xref: /dflybsd-src/sys/kern/sysv_msg.c (revision 92b817bb6b0d8e22d601698e9d3864697def6575)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.18 2008/01/06 16:55:51 swildner Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/priv.h>
31 #include <sys/msg.h>
32 #include <sys/sysent.h>
33 #include <sys/sysctl.h>
34 #include <sys/malloc.h>
35 #include <sys/jail.h>
36 
37 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
38 
39 static void msginit (void *);
40 
41 #define MSG_DEBUG
42 #undef MSG_DEBUG_OK
43 
44 static void msg_freehdr (struct msg *msghdr);
45 
46 /* XXX casting to (sy_call_t *) is bogus, as usual. */
47 static sy_call_t *msgcalls[] = {
48 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
49 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
50 };
51 
52 struct msg {
53 	struct	msg *msg_next;	/* next msg in the chain */
54 	long	msg_type;	/* type of this message */
55     				/* >0 -> type of this message */
56     				/* 0 -> free header */
57 	u_short	msg_ts;		/* size of this message */
58 	short	msg_spot;	/* location of start of msg in buffer */
59 };
60 
61 
62 #ifndef MSGSSZ
63 #define MSGSSZ	8		/* Each segment must be 2^N long */
64 #endif
65 #ifndef MSGSEG
66 #define MSGSEG	2048		/* must be less than 32767 */
67 #endif
68 #define MSGMAX	(MSGSSZ*MSGSEG)
69 #ifndef MSGMNB
70 #define MSGMNB	2048		/* max # of bytes in a queue */
71 #endif
72 #ifndef MSGMNI
73 #define MSGMNI	40
74 #endif
75 #ifndef MSGTQL
76 #define MSGTQL	40
77 #endif
78 
79 /*
80  * Based on the configuration parameters described in an SVR2 (yes, two)
81  * config(1m) man page.
82  *
83  * Each message is broken up and stored in segments that are msgssz bytes
84  * long.  For efficiency reasons, this should be a power of two.  Also,
85  * it doesn't make sense if it is less than 8 or greater than about 256.
86  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
87  * two between 8 and 1024 inclusive (and panic's if it isn't).
88  */
89 struct msginfo msginfo = {
90                 MSGMAX,         /* max chars in a message */
91                 MSGMNI,         /* # of message queue identifiers */
92                 MSGMNB,         /* max chars in a queue */
93                 MSGTQL,         /* max messages in system */
94                 MSGSSZ,         /* size of a message segment */
95                 		/* (must be small power of 2 greater than 4) */
96                 MSGSEG          /* number of message segments */
97 };
98 
99 /*
100  * macros to convert between msqid_ds's and msqid's.
101  * (specific to this implementation)
102  */
103 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
104 #define MSQID_IX(id)	((id) & 0xffff)
105 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
106 
107 /*
108  * The rest of this file is specific to this particular implementation.
109  */
110 
111 struct msgmap {
112 	short	next;		/* next segment in buffer */
113     				/* -1 -> available */
114     				/* 0..(MSGSEG-1) -> index of next segment */
115 };
116 
117 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
118 
119 static int nfree_msgmaps;	/* # of free map entries */
120 static short free_msgmaps;	/* head of linked list of free map entries */
121 static struct msg *free_msghdrs;/* list of free msg headers */
122 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
123 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
124 static struct msg *msghdrs;	/* MSGTQL msg headers */
125 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
126 
127 static void
128 msginit(void *dummy)
129 {
130 	int i;
131 
132 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
133 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
134 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
135 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
136 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
137 
138 	/*
139 	 * msginfo.msgssz should be a power of two for efficiency reasons.
140 	 * It is also pretty silly if msginfo.msgssz is less than 8
141 	 * or greater than about 256 so ...
142 	 */
143 
144 	i = 8;
145 	while (i < 1024 && i != msginfo.msgssz)
146 		i <<= 1;
147     	if (i != msginfo.msgssz) {
148 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
149 		    msginfo.msgssz);
150 		panic("msginfo.msgssz not a small power of 2");
151 	}
152 
153 	if (msginfo.msgseg > 32767) {
154 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
155 		panic("msginfo.msgseg > 32767");
156 	}
157 
158 	if (msgmaps == NULL)
159 		panic("msgmaps is NULL");
160 
161 	for (i = 0; i < msginfo.msgseg; i++) {
162 		if (i > 0)
163 			msgmaps[i-1].next = i;
164 		msgmaps[i].next = -1;	/* implies entry is available */
165 	}
166 	free_msgmaps = 0;
167 	nfree_msgmaps = msginfo.msgseg;
168 
169 	if (msghdrs == NULL)
170 		panic("msghdrs is NULL");
171 
172 	for (i = 0; i < msginfo.msgtql; i++) {
173 		msghdrs[i].msg_type = 0;
174 		if (i > 0)
175 			msghdrs[i-1].msg_next = &msghdrs[i];
176 		msghdrs[i].msg_next = NULL;
177     	}
178 	free_msghdrs = &msghdrs[0];
179 
180 	if (msqids == NULL)
181 		panic("msqids is NULL");
182 
183 	for (i = 0; i < msginfo.msgmni; i++) {
184 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
185 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
186 		msqids[i].msg_perm.mode = 0;
187 	}
188 }
189 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
190 
191 /*
192  * Entry point for all MSG calls
193  *
194  * msgsys_args(int which, int a2, ...) (VARARGS)
195  *
196  * MPALMOSTSAFE
197  */
198 int
199 sys_msgsys(struct msgsys_args *uap)
200 {
201 	struct proc *p = curproc;
202 	unsigned int which = (unsigned int)uap->which;
203 	int error;
204 
205 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
206 		return (ENOSYS);
207 
208 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
209 		return (EINVAL);
210 	bcopy(&uap->a2, &uap->which,
211 	      sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
212 	get_mplock();
213 	error = (*msgcalls[which])(uap);
214 	rel_mplock();
215 	return (error);
216 }
217 
218 static void
219 msg_freehdr(struct msg *msghdr)
220 {
221 	while (msghdr->msg_ts > 0) {
222 		short next;
223 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
224 			panic("msghdr->msg_spot out of range");
225 		next = msgmaps[msghdr->msg_spot].next;
226 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
227 		free_msgmaps = msghdr->msg_spot;
228 		nfree_msgmaps++;
229 		msghdr->msg_spot = next;
230 		if (msghdr->msg_ts >= msginfo.msgssz)
231 			msghdr->msg_ts -= msginfo.msgssz;
232 		else
233 			msghdr->msg_ts = 0;
234 	}
235 	if (msghdr->msg_spot != -1)
236 		panic("msghdr->msg_spot != -1");
237 	msghdr->msg_next = free_msghdrs;
238 	free_msghdrs = msghdr;
239 }
240 
241 /*
242  * MPALMOSTSAFE
243  */
244 int
245 sys_msgctl(struct msgctl_args *uap)
246 {
247 	struct thread *td = curthread;
248 	struct proc *p = td->td_proc;
249 	int msqid = uap->msqid;
250 	int cmd = uap->cmd;
251 	struct msqid_ds *user_msqptr = uap->buf;
252 	int rval, eval;
253 	struct msqid_ds msqbuf;
254 	struct msqid_ds *msqptr;
255 
256 #ifdef MSG_DEBUG_OK
257 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
258 #endif
259 
260 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
261 		return (ENOSYS);
262 
263 	get_mplock();
264 	msqid = IPCID_TO_IX(msqid);
265 
266 	if (msqid < 0 || msqid >= msginfo.msgmni) {
267 #ifdef MSG_DEBUG_OK
268 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
269 		    msginfo.msgmni);
270 #endif
271 		eval = EINVAL;
272 		goto done;
273 	}
274 
275 	msqptr = &msqids[msqid];
276 
277 	if (msqptr->msg_qbytes == 0) {
278 #ifdef MSG_DEBUG_OK
279 		kprintf("no such msqid\n");
280 #endif
281 		eval = EINVAL;
282 		goto done;
283 	}
284 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
285 #ifdef MSG_DEBUG_OK
286 		kprintf("wrong sequence number\n");
287 #endif
288 		eval = EINVAL;
289 		goto done;
290 	}
291 
292 	rval = 0;
293 
294 	switch (cmd) {
295 	case IPC_RMID:
296 	{
297 		struct msg *msghdr;
298 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
299 			break;
300 		/* Free the message headers */
301 		msghdr = msqptr->msg_first;
302 		while (msghdr != NULL) {
303 			struct msg *msghdr_tmp;
304 
305 			/* Free the segments of each message */
306 			msqptr->msg_cbytes -= msghdr->msg_ts;
307 			msqptr->msg_qnum--;
308 			msghdr_tmp = msghdr;
309 			msghdr = msghdr->msg_next;
310 			msg_freehdr(msghdr_tmp);
311 		}
312 
313 		if (msqptr->msg_cbytes != 0)
314 			panic("msg_cbytes is screwed up");
315 		if (msqptr->msg_qnum != 0)
316 			panic("msg_qnum is screwed up");
317 
318 		msqptr->msg_qbytes = 0;	/* Mark it as free */
319 
320 		wakeup((caddr_t)msqptr);
321 	}
322 
323 		break;
324 
325 	case IPC_SET:
326 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
327 			break;
328 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
329 			break;
330 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
331 			eval = priv_check(td, PRIV_ROOT);
332 			if (eval)
333 				break;
334 		}
335 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
336 #ifdef MSG_DEBUG_OK
337 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
338 			    msginfo.msgmnb);
339 #endif
340 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
341 		}
342 		if (msqbuf.msg_qbytes == 0) {
343 #ifdef MSG_DEBUG_OK
344 			kprintf("can't reduce msg_qbytes to 0\n");
345 #endif
346 			eval = EINVAL;		/* non-standard errno! */
347 			break;
348 		}
349 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
350 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
351 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
352 					(msqbuf.msg_perm.mode & 0777);
353 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
354 		msqptr->msg_ctime = time_second;
355 		break;
356 
357 	case IPC_STAT:
358 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
359 #ifdef MSG_DEBUG_OK
360 			kprintf("requester doesn't have read access\n");
361 #endif
362 			eval = EINVAL;
363 			break;
364 		}
365 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
366 		break;
367 
368 	default:
369 #ifdef MSG_DEBUG_OK
370 		kprintf("invalid command %d\n", cmd);
371 #endif
372 		eval = EINVAL;
373 		break;
374 	}
375 done:
376 	rel_mplock();
377 	if (eval == 0)
378 		uap->sysmsg_result = rval;
379 	return(eval);
380 }
381 
382 /*
383  * MPALMOSTSAFE
384  */
385 int
386 sys_msgget(struct msgget_args *uap)
387 {
388 	struct proc *p = curproc;
389 	int msqid, eval;
390 	int key = uap->key;
391 	int msgflg = uap->msgflg;
392 	struct ucred *cred = p->p_ucred;
393 	struct msqid_ds *msqptr = NULL;
394 
395 #ifdef MSG_DEBUG_OK
396 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
397 #endif
398 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
399 		return (ENOSYS);
400 
401 	eval = 0;
402 	get_mplock();
403 
404 	if (key != IPC_PRIVATE) {
405 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
406 			msqptr = &msqids[msqid];
407 			if (msqptr->msg_qbytes != 0 &&
408 			    msqptr->msg_perm.key == key)
409 				break;
410 		}
411 		if (msqid < msginfo.msgmni) {
412 #ifdef MSG_DEBUG_OK
413 			kprintf("found public key\n");
414 #endif
415 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
416 #ifdef MSG_DEBUG_OK
417 				kprintf("not exclusive\n");
418 #endif
419 				eval = EEXIST;
420 				goto done;
421 			}
422 			if ((eval = ipcperm(p, &msqptr->msg_perm, msgflg & 0700 ))) {
423 #ifdef MSG_DEBUG_OK
424 				kprintf("requester doesn't have 0%o access\n",
425 				    msgflg & 0700);
426 #endif
427 				goto done;
428 			}
429 			goto done;
430 		}
431 	}
432 
433 #ifdef MSG_DEBUG_OK
434 	kprintf("need to allocate the msqid_ds\n");
435 #endif
436 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
437 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
438 			/*
439 			 * Look for an unallocated and unlocked msqid_ds.
440 			 * msqid_ds's can be locked by msgsnd or msgrcv while
441 			 * they are copying the message in/out.  We can't
442 			 * re-use the entry until they release it.
443 			 */
444 			msqptr = &msqids[msqid];
445 			if (msqptr->msg_qbytes == 0 &&
446 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
447 				break;
448 		}
449 		if (msqid == msginfo.msgmni) {
450 #ifdef MSG_DEBUG_OK
451 			kprintf("no more msqid_ds's available\n");
452 #endif
453 			eval = ENOSPC;
454 			goto done;
455 		}
456 #ifdef MSG_DEBUG_OK
457 		kprintf("msqid %d is available\n", msqid);
458 #endif
459 		msqptr->msg_perm.key = key;
460 		msqptr->msg_perm.cuid = cred->cr_uid;
461 		msqptr->msg_perm.uid = cred->cr_uid;
462 		msqptr->msg_perm.cgid = cred->cr_gid;
463 		msqptr->msg_perm.gid = cred->cr_gid;
464 		msqptr->msg_perm.mode = (msgflg & 0777);
465 		/* Make sure that the returned msqid is unique */
466 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
467 		msqptr->msg_first = NULL;
468 		msqptr->msg_last = NULL;
469 		msqptr->msg_cbytes = 0;
470 		msqptr->msg_qnum = 0;
471 		msqptr->msg_qbytes = msginfo.msgmnb;
472 		msqptr->msg_lspid = 0;
473 		msqptr->msg_lrpid = 0;
474 		msqptr->msg_stime = 0;
475 		msqptr->msg_rtime = 0;
476 		msqptr->msg_ctime = time_second;
477 	} else {
478 #ifdef MSG_DEBUG_OK
479 		kprintf("didn't find it and wasn't asked to create it\n");
480 #endif
481 		eval = ENOENT;
482 	}
483 
484 done:
485 	rel_mplock();
486 	/* Construct the unique msqid */
487 	if (eval == 0)
488 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
489 	return(eval);
490 }
491 
492 /*
493  * MPALMOSTSAFE
494  */
495 int
496 sys_msgsnd(struct msgsnd_args *uap)
497 {
498 	struct proc *p = curproc;
499 	int msqid = uap->msqid;
500 	void *user_msgp = uap->msgp;
501 	size_t msgsz = uap->msgsz;
502 	int msgflg = uap->msgflg;
503 	int segs_needed, eval;
504 	struct msqid_ds *msqptr;
505 	struct msg *msghdr;
506 	short next;
507 
508 #ifdef MSG_DEBUG_OK
509 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
510 	    msgflg);
511 #endif
512 
513 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
514 		return (ENOSYS);
515 
516 	get_mplock();
517 	msqid = IPCID_TO_IX(msqid);
518 
519 	if (msqid < 0 || msqid >= msginfo.msgmni) {
520 #ifdef MSG_DEBUG_OK
521 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
522 		    msginfo.msgmni);
523 #endif
524 		eval = EINVAL;
525 		goto done;
526 	}
527 
528 	msqptr = &msqids[msqid];
529 	if (msqptr->msg_qbytes == 0) {
530 #ifdef MSG_DEBUG_OK
531 		kprintf("no such message queue id\n");
532 #endif
533 		eval = EINVAL;
534 		goto done;
535 	}
536 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
537 #ifdef MSG_DEBUG_OK
538 		kprintf("wrong sequence number\n");
539 #endif
540 		eval = EINVAL;
541 		goto done;
542 	}
543 
544 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_W))) {
545 #ifdef MSG_DEBUG_OK
546 		kprintf("requester doesn't have write access\n");
547 #endif
548 		eval = eval;
549 		goto done;
550 	}
551 
552 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
553 #ifdef MSG_DEBUG_OK
554 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
555 	    segs_needed);
556 #endif
557 	for (;;) {
558 		int need_more_resources = 0;
559 
560 		/*
561 		 * check msgsz
562 		 * (inside this loop in case msg_qbytes changes while we sleep)
563 		 */
564 
565 		if (msgsz > msqptr->msg_qbytes) {
566 #ifdef MSG_DEBUG_OK
567 			kprintf("msgsz > msqptr->msg_qbytes\n");
568 #endif
569 			eval = EINVAL;
570 			goto done;
571 		}
572 
573 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
574 #ifdef MSG_DEBUG_OK
575 			kprintf("msqid is locked\n");
576 #endif
577 			need_more_resources = 1;
578 		}
579 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
580 #ifdef MSG_DEBUG_OK
581 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
582 #endif
583 			need_more_resources = 1;
584 		}
585 		if (segs_needed > nfree_msgmaps) {
586 #ifdef MSG_DEBUG_OK
587 			kprintf("segs_needed > nfree_msgmaps\n");
588 #endif
589 			need_more_resources = 1;
590 		}
591 		if (free_msghdrs == NULL) {
592 #ifdef MSG_DEBUG_OK
593 			kprintf("no more msghdrs\n");
594 #endif
595 			need_more_resources = 1;
596 		}
597 
598 		if (need_more_resources) {
599 			int we_own_it;
600 
601 			if ((msgflg & IPC_NOWAIT) != 0) {
602 #ifdef MSG_DEBUG_OK
603 				kprintf("need more resources but caller doesn't want to wait\n");
604 #endif
605 				eval = EAGAIN;
606 				goto done;
607 			}
608 
609 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
610 #ifdef MSG_DEBUG_OK
611 				kprintf("we don't own the msqid_ds\n");
612 #endif
613 				we_own_it = 0;
614 			} else {
615 				/* Force later arrivals to wait for our
616 				   request */
617 #ifdef MSG_DEBUG_OK
618 				kprintf("we own the msqid_ds\n");
619 #endif
620 				msqptr->msg_perm.mode |= MSG_LOCKED;
621 				we_own_it = 1;
622 			}
623 #ifdef MSG_DEBUG_OK
624 			kprintf("goodnight\n");
625 #endif
626 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
627 #ifdef MSG_DEBUG_OK
628 			kprintf("good morning, eval=%d\n", eval);
629 #endif
630 			if (we_own_it)
631 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
632 			if (eval != 0) {
633 #ifdef MSG_DEBUG_OK
634 				kprintf("msgsnd:  interrupted system call\n");
635 #endif
636 				eval = EINTR;
637 				goto done;
638 			}
639 
640 			/*
641 			 * Make sure that the msq queue still exists
642 			 */
643 
644 			if (msqptr->msg_qbytes == 0) {
645 #ifdef MSG_DEBUG_OK
646 				kprintf("msqid deleted\n");
647 #endif
648 				eval = EIDRM;
649 				goto done;
650 			}
651 
652 		} else {
653 #ifdef MSG_DEBUG_OK
654 			kprintf("got all the resources that we need\n");
655 #endif
656 			break;
657 		}
658 	}
659 
660 	/*
661 	 * We have the resources that we need.
662 	 * Make sure!
663 	 */
664 
665 	if (msqptr->msg_perm.mode & MSG_LOCKED)
666 		panic("msg_perm.mode & MSG_LOCKED");
667 	if (segs_needed > nfree_msgmaps)
668 		panic("segs_needed > nfree_msgmaps");
669 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
670 		panic("msgsz + msg_cbytes > msg_qbytes");
671 	if (free_msghdrs == NULL)
672 		panic("no more msghdrs");
673 
674 	/*
675 	 * Re-lock the msqid_ds in case we page-fault when copying in the
676 	 * message
677 	 */
678 
679 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
680 		panic("msqid_ds is already locked");
681 	msqptr->msg_perm.mode |= MSG_LOCKED;
682 
683 	/*
684 	 * Allocate a message header
685 	 */
686 
687 	msghdr = free_msghdrs;
688 	free_msghdrs = msghdr->msg_next;
689 	msghdr->msg_spot = -1;
690 	msghdr->msg_ts = msgsz;
691 
692 	/*
693 	 * Allocate space for the message
694 	 */
695 
696 	while (segs_needed > 0) {
697 		if (nfree_msgmaps <= 0)
698 			panic("not enough msgmaps");
699 		if (free_msgmaps == -1)
700 			panic("nil free_msgmaps");
701 		next = free_msgmaps;
702 		if (next <= -1)
703 			panic("next too low #1");
704 		if (next >= msginfo.msgseg)
705 			panic("next out of range #1");
706 #ifdef MSG_DEBUG_OK
707 		kprintf("allocating segment %d to message\n", next);
708 #endif
709 		free_msgmaps = msgmaps[next].next;
710 		nfree_msgmaps--;
711 		msgmaps[next].next = msghdr->msg_spot;
712 		msghdr->msg_spot = next;
713 		segs_needed--;
714 	}
715 
716 	/*
717 	 * Copy in the message type
718 	 */
719 
720 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
721 	    sizeof(msghdr->msg_type))) != 0) {
722 #ifdef MSG_DEBUG_OK
723 		kprintf("error %d copying the message type\n", eval);
724 #endif
725 		msg_freehdr(msghdr);
726 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
727 		wakeup((caddr_t)msqptr);
728 		goto done;
729 	}
730 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
731 
732 	/*
733 	 * Validate the message type
734 	 */
735 
736 	if (msghdr->msg_type < 1) {
737 		msg_freehdr(msghdr);
738 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
739 		wakeup((caddr_t)msqptr);
740 #ifdef MSG_DEBUG_OK
741 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
742 #endif
743 		eval = EINVAL;
744 		goto done;
745 	}
746 
747 	/*
748 	 * Copy in the message body
749 	 */
750 
751 	next = msghdr->msg_spot;
752 	while (msgsz > 0) {
753 		size_t tlen;
754 		if (msgsz > msginfo.msgssz)
755 			tlen = msginfo.msgssz;
756 		else
757 			tlen = msgsz;
758 		if (next <= -1)
759 			panic("next too low #2");
760 		if (next >= msginfo.msgseg)
761 			panic("next out of range #2");
762 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
763 		    tlen)) != 0) {
764 #ifdef MSG_DEBUG_OK
765 			kprintf("error %d copying in message segment\n", eval);
766 #endif
767 			msg_freehdr(msghdr);
768 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
769 			wakeup((caddr_t)msqptr);
770 			goto done;
771 		}
772 		msgsz -= tlen;
773 		user_msgp = (char *)user_msgp + tlen;
774 		next = msgmaps[next].next;
775 	}
776 	if (next != -1)
777 		panic("didn't use all the msg segments");
778 
779 	/*
780 	 * We've got the message.  Unlock the msqid_ds.
781 	 */
782 
783 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
784 
785 	/*
786 	 * Make sure that the msqid_ds is still allocated.
787 	 */
788 
789 	if (msqptr->msg_qbytes == 0) {
790 		msg_freehdr(msghdr);
791 		wakeup((caddr_t)msqptr);
792 		eval = EIDRM;
793 		goto done;
794 	}
795 
796 	/*
797 	 * Put the message into the queue
798 	 */
799 
800 	if (msqptr->msg_first == NULL) {
801 		msqptr->msg_first = msghdr;
802 		msqptr->msg_last = msghdr;
803 	} else {
804 		msqptr->msg_last->msg_next = msghdr;
805 		msqptr->msg_last = msghdr;
806 	}
807 	msqptr->msg_last->msg_next = NULL;
808 
809 	msqptr->msg_cbytes += msghdr->msg_ts;
810 	msqptr->msg_qnum++;
811 	msqptr->msg_lspid = p->p_pid;
812 	msqptr->msg_stime = time_second;
813 
814 	wakeup((caddr_t)msqptr);
815 	eval = 0;
816 done:
817 	rel_mplock();
818 	if (eval == 0)
819 		uap->sysmsg_result = 0;
820 	return (eval);
821 }
822 
823 /*
824  * MPALMOSTSAFE
825  */
826 int
827 sys_msgrcv(struct msgrcv_args *uap)
828 {
829 	struct proc *p = curproc;
830 	int msqid = uap->msqid;
831 	void *user_msgp = uap->msgp;
832 	size_t msgsz = uap->msgsz;
833 	long msgtyp = uap->msgtyp;
834 	int msgflg = uap->msgflg;
835 	size_t len;
836 	struct msqid_ds *msqptr;
837 	struct msg *msghdr;
838 	int eval;
839 	short next;
840 
841 #ifdef MSG_DEBUG_OK
842 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
843 	    msgsz, msgtyp, msgflg);
844 #endif
845 
846 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
847 		return (ENOSYS);
848 
849 	get_mplock();
850 	msqid = IPCID_TO_IX(msqid);
851 
852 	if (msqid < 0 || msqid >= msginfo.msgmni) {
853 #ifdef MSG_DEBUG_OK
854 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
855 		    msginfo.msgmni);
856 #endif
857 		eval = EINVAL;
858 		goto done;
859 	}
860 
861 	msqptr = &msqids[msqid];
862 	if (msqptr->msg_qbytes == 0) {
863 #ifdef MSG_DEBUG_OK
864 		kprintf("no such message queue id\n");
865 #endif
866 		eval = EINVAL;
867 		goto done;
868 	}
869 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
870 #ifdef MSG_DEBUG_OK
871 		kprintf("wrong sequence number\n");
872 #endif
873 		eval = EINVAL;
874 		goto done;
875 	}
876 
877 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
878 #ifdef MSG_DEBUG_OK
879 		kprintf("requester doesn't have read access\n");
880 #endif
881 		goto done;
882 	}
883 
884 	msghdr = NULL;
885 	while (msghdr == NULL) {
886 		if (msgtyp == 0) {
887 			msghdr = msqptr->msg_first;
888 			if (msghdr != NULL) {
889 				if (msgsz < msghdr->msg_ts &&
890 				    (msgflg & MSG_NOERROR) == 0) {
891 #ifdef MSG_DEBUG_OK
892 					kprintf("first message on the queue is too big (want %d, got %d)\n",
893 					    msgsz, msghdr->msg_ts);
894 #endif
895 					eval = E2BIG;
896 					goto done;
897 				}
898 				if (msqptr->msg_first == msqptr->msg_last) {
899 					msqptr->msg_first = NULL;
900 					msqptr->msg_last = NULL;
901 				} else {
902 					msqptr->msg_first = msghdr->msg_next;
903 					if (msqptr->msg_first == NULL)
904 						panic("msg_first/last screwed up #1");
905 				}
906 			}
907 		} else {
908 			struct msg *previous;
909 			struct msg **prev;
910 
911 			previous = NULL;
912 			prev = &(msqptr->msg_first);
913 			while ((msghdr = *prev) != NULL) {
914 				/*
915 				 * Is this message's type an exact match or is
916 				 * this message's type less than or equal to
917 				 * the absolute value of a negative msgtyp?
918 				 * Note that the second half of this test can
919 				 * NEVER be true if msgtyp is positive since
920 				 * msg_type is always positive!
921 				 */
922 
923 				if (msgtyp == msghdr->msg_type ||
924 				    msghdr->msg_type <= -msgtyp) {
925 #ifdef MSG_DEBUG_OK
926 					kprintf("found message type %d, requested %d\n",
927 					    msghdr->msg_type, msgtyp);
928 #endif
929 					if (msgsz < msghdr->msg_ts &&
930 					    (msgflg & MSG_NOERROR) == 0) {
931 #ifdef MSG_DEBUG_OK
932 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
933 						    msgsz, msghdr->msg_ts);
934 #endif
935 						eval = E2BIG;
936 						goto done;
937 					}
938 					*prev = msghdr->msg_next;
939 					if (msghdr == msqptr->msg_last) {
940 						if (previous == NULL) {
941 							if (prev !=
942 							    &msqptr->msg_first)
943 								panic("msg_first/last screwed up #2");
944 							msqptr->msg_first =
945 							    NULL;
946 							msqptr->msg_last =
947 							    NULL;
948 						} else {
949 							if (prev ==
950 							    &msqptr->msg_first)
951 								panic("msg_first/last screwed up #3");
952 							msqptr->msg_last =
953 							    previous;
954 						}
955 					}
956 					break;
957 				}
958 				previous = msghdr;
959 				prev = &(msghdr->msg_next);
960 			}
961 		}
962 
963 		/*
964 		 * We've either extracted the msghdr for the appropriate
965 		 * message or there isn't one.
966 		 * If there is one then bail out of this loop.
967 		 */
968 
969 		if (msghdr != NULL)
970 			break;
971 
972 		/*
973 		 * Hmph!  No message found.  Does the user want to wait?
974 		 */
975 
976 		if ((msgflg & IPC_NOWAIT) != 0) {
977 #ifdef MSG_DEBUG_OK
978 			kprintf("no appropriate message found (msgtyp=%d)\n",
979 			    msgtyp);
980 #endif
981 			/* The SVID says to return ENOMSG. */
982 #ifdef ENOMSG
983 			eval = ENOMSG;
984 #else
985 			/* Unfortunately, BSD doesn't define that code yet! */
986 			eval = EAGAIN;
987 #endif
988 			goto done;
989 		}
990 
991 		/*
992 		 * Wait for something to happen
993 		 */
994 
995 #ifdef MSG_DEBUG_OK
996 		kprintf("msgrcv:  goodnight\n");
997 #endif
998 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
999 #ifdef MSG_DEBUG_OK
1000 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
1001 #endif
1002 
1003 		if (eval != 0) {
1004 #ifdef MSG_DEBUG_OK
1005 			kprintf("msgsnd:  interrupted system call\n");
1006 #endif
1007 			eval = EINTR;
1008 			goto done;
1009 		}
1010 
1011 		/*
1012 		 * Make sure that the msq queue still exists
1013 		 */
1014 
1015 		if (msqptr->msg_qbytes == 0 ||
1016 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1017 #ifdef MSG_DEBUG_OK
1018 			kprintf("msqid deleted\n");
1019 #endif
1020 			eval = EIDRM;
1021 			goto done;
1022 		}
1023 	}
1024 
1025 	/*
1026 	 * Return the message to the user.
1027 	 *
1028 	 * First, do the bookkeeping (before we risk being interrupted).
1029 	 */
1030 
1031 	msqptr->msg_cbytes -= msghdr->msg_ts;
1032 	msqptr->msg_qnum--;
1033 	msqptr->msg_lrpid = p->p_pid;
1034 	msqptr->msg_rtime = time_second;
1035 
1036 	/*
1037 	 * Make msgsz the actual amount that we'll be returning.
1038 	 * Note that this effectively truncates the message if it is too long
1039 	 * (since msgsz is never increased).
1040 	 */
1041 
1042 #ifdef MSG_DEBUG_OK
1043 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1044 	    msghdr->msg_ts);
1045 #endif
1046 	if (msgsz > msghdr->msg_ts)
1047 		msgsz = msghdr->msg_ts;
1048 
1049 	/*
1050 	 * Return the type to the user.
1051 	 */
1052 
1053 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1054 	    sizeof(msghdr->msg_type));
1055 	if (eval != 0) {
1056 #ifdef MSG_DEBUG_OK
1057 		kprintf("error (%d) copying out message type\n", eval);
1058 #endif
1059 		msg_freehdr(msghdr);
1060 		wakeup((caddr_t)msqptr);
1061 		goto done;
1062 	}
1063 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1064 
1065 	/*
1066 	 * Return the segments to the user
1067 	 */
1068 
1069 	next = msghdr->msg_spot;
1070 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1071 		size_t tlen;
1072 
1073 		if (msgsz - len > msginfo.msgssz)
1074 			tlen = msginfo.msgssz;
1075 		else
1076 			tlen = msgsz - len;
1077 		if (next <= -1)
1078 			panic("next too low #3");
1079 		if (next >= msginfo.msgseg)
1080 			panic("next out of range #3");
1081 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1082 		    user_msgp, tlen);
1083 		if (eval != 0) {
1084 #ifdef MSG_DEBUG_OK
1085 			kprintf("error (%d) copying out message segment\n",
1086 			    eval);
1087 #endif
1088 			msg_freehdr(msghdr);
1089 			wakeup((caddr_t)msqptr);
1090 			goto done;
1091 		}
1092 		user_msgp = (char *)user_msgp + tlen;
1093 		next = msgmaps[next].next;
1094 	}
1095 
1096 	/*
1097 	 * Done, return the actual number of bytes copied out.
1098 	 */
1099 
1100 	msg_freehdr(msghdr);
1101 	wakeup((caddr_t)msqptr);
1102 	eval = 0;
1103 done:
1104 	rel_mplock();
1105 	if (eval == 0)
1106 		uap->sysmsg_result = msgsz;
1107 	return(eval);
1108 }
1109 
1110 static int
1111 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1112 {
1113 
1114 	return (SYSCTL_OUT(req, msqids,
1115 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1116 }
1117 
1118 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1119 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1120 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1121 
1122 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1123 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1124 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1125 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1126 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1127 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1128 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1129     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1130