xref: /netbsd-src/sys/kern/sysv_msg.c (revision 5b84b3983f71fd20a534cfa5d1556623a8aaa717)
1 /*	$NetBSD: sysv_msg.c,v 1.39 2005/04/01 11:59:37 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Implementation of SVID messages
42  *
43  * Author: Daniel Boulet
44  *
45  * Copyright 1993 Daniel Boulet and RTMX Inc.
46  *
47  * This system call was implemented by Daniel Boulet under contract from RTMX.
48  *
49  * Redistribution and use in source forms, with and without modification,
50  * are permitted provided that this entire comment appears intact.
51  *
52  * Redistribution in binary form may occur without any restrictions.
53  * Obviously, it would be nice if you gave credit where credit is due
54  * but requiring it would be too onerous.
55  *
56  * This software is provided ``AS IS'' without any warranties of any kind.
57  */
58 
59 #include <sys/cdefs.h>
60 __KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.39 2005/04/01 11:59:37 yamt Exp $");
61 
62 #define SYSVMSG
63 
64 #include <sys/param.h>
65 #include <sys/kernel.h>
66 #include <sys/msg.h>
67 #include <sys/sysctl.h>
68 #include <sys/mount.h>		/* XXX for <sys/syscallargs.h> */
69 #include <sys/sa.h>
70 #include <sys/syscallargs.h>
71 
72 #define MSG_DEBUG
73 #undef MSG_DEBUG_OK
74 
75 #ifdef MSG_DEBUG_OK
76 #define MSG_PRINTF(a)	printf a
77 #else
78 #define MSG_PRINTF(a)
79 #endif
80 
81 static int	nfree_msgmaps;		/* # of free map entries */
82 static short	free_msgmaps;	/* head of linked list of free map entries */
83 static struct	__msg *free_msghdrs;	/* list of free msg headers */
84 static char	*msgpool;		/* MSGMAX byte long msg buffer pool */
85 static struct	msgmap *msgmaps;	/* MSGSEG msgmap structures */
86 static struct __msg *msghdrs;		/* MSGTQL msg headers */
87 struct	msqid_ds *msqids;		/* MSGMNI msqid_ds struct's */
88 
89 static void msg_freehdr(struct __msg *);
90 
91 void
92 msginit()
93 {
94 	int i, sz;
95 	vaddr_t v;
96 
97 	/*
98 	 * msginfo.msgssz should be a power of two for efficiency reasons.
99 	 * It is also pretty silly if msginfo.msgssz is less than 8
100 	 * or greater than about 256 so ...
101 	 */
102 
103 	i = 8;
104 	while (i < 1024 && i != msginfo.msgssz)
105 		i <<= 1;
106     	if (i != msginfo.msgssz) {
107 		MSG_PRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
108 		    msginfo.msgssz));
109 		panic("msginfo.msgssz not a small power of 2");
110 	}
111 
112 	if (msginfo.msgseg > 32767) {
113 		MSG_PRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
114 		panic("msginfo.msgseg > 32767");
115 	}
116 
117 	/* Allocate pageable memory for our structures */
118 	sz = msginfo.msgmax
119 		+ msginfo.msgseg * sizeof(struct msgmap)
120 		+ msginfo.msgtql * sizeof(struct __msg)
121 		+ msginfo.msgmni * sizeof(struct msqid_ds);
122 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
123 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
124 	if (v == 0)
125 		panic("sysv_msg: cannot allocate memory");
126 	msgpool = (void *)v;
127 	msgmaps = (void *) (msgpool + msginfo.msgmax);
128 	msghdrs = (void *) (msgmaps + msginfo.msgseg);
129 	msqids = (void *) (msghdrs + msginfo.msgtql);
130 
131 	for (i = 0; i < msginfo.msgseg; i++) {
132 		if (i > 0)
133 			msgmaps[i-1].next = i;
134 		msgmaps[i].next = -1;	/* implies entry is available */
135 	}
136 	free_msgmaps = 0;
137 	nfree_msgmaps = msginfo.msgseg;
138 
139 	if (msghdrs == NULL)
140 		panic("msghdrs is NULL");
141 
142 	for (i = 0; i < msginfo.msgtql; i++) {
143 		msghdrs[i].msg_type = 0;
144 		if (i > 0)
145 			msghdrs[i-1].msg_next = &msghdrs[i];
146 		msghdrs[i].msg_next = NULL;
147     	}
148 	free_msghdrs = &msghdrs[0];
149 
150 	if (msqids == NULL)
151 		panic("msqids is NULL");
152 
153 	for (i = 0; i < msginfo.msgmni; i++) {
154 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
155 		msqids[i].msg_perm._seq = 0;	/* reset to a known value */
156 	}
157 }
158 
159 static void
160 msg_freehdr(msghdr)
161 	struct __msg *msghdr;
162 {
163 	while (msghdr->msg_ts > 0) {
164 		short next;
165 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
166 			panic("msghdr->msg_spot out of range");
167 		next = msgmaps[msghdr->msg_spot].next;
168 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
169 		free_msgmaps = msghdr->msg_spot;
170 		nfree_msgmaps++;
171 		msghdr->msg_spot = next;
172 		if (msghdr->msg_ts >= msginfo.msgssz)
173 			msghdr->msg_ts -= msginfo.msgssz;
174 		else
175 			msghdr->msg_ts = 0;
176 	}
177 	if (msghdr->msg_spot != -1)
178 		panic("msghdr->msg_spot != -1");
179 	msghdr->msg_next = free_msghdrs;
180 	free_msghdrs = msghdr;
181 }
182 
183 int
184 sys___msgctl13(l, v, retval)
185 	struct lwp *l;
186 	void *v;
187 	register_t *retval;
188 {
189 	struct sys___msgctl13_args /* {
190 		syscallarg(int) msqid;
191 		syscallarg(int) cmd;
192 		syscallarg(struct msqid_ds *) buf;
193 	} */ *uap = v;
194 	struct proc *p = l->l_proc;
195 	struct msqid_ds msqbuf;
196 	int cmd, error;
197 
198 	cmd = SCARG(uap, cmd);
199 
200 	if (cmd == IPC_SET) {
201 		error = copyin(SCARG(uap, buf), &msqbuf, sizeof(msqbuf));
202 		if (error)
203 			return (error);
204 	}
205 
206 	error = msgctl1(p, SCARG(uap, msqid), cmd,
207 	    (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL);
208 
209 	if (error == 0 && cmd == IPC_STAT)
210 		error = copyout(&msqbuf, SCARG(uap, buf), sizeof(msqbuf));
211 
212 	return (error);
213 }
214 
215 int
216 msgctl1(p, msqid, cmd, msqbuf)
217 	struct proc *p;
218 	int msqid;
219 	int cmd;
220 	struct msqid_ds *msqbuf;
221 {
222 	struct ucred *cred = p->p_ucred;
223 	struct msqid_ds *msqptr;
224 	int error = 0, ix;
225 
226 	MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd));
227 
228 	ix = IPCID_TO_IX(msqid);
229 
230 	if (ix < 0 || ix >= msginfo.msgmni) {
231 		MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix,
232 		    msginfo.msgmni));
233 		return (EINVAL);
234 	}
235 
236 	msqptr = &msqids[ix];
237 
238 	if (msqptr->msg_qbytes == 0) {
239 		MSG_PRINTF(("no such msqid\n"));
240 		return (EINVAL);
241 	}
242 	if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) {
243 		MSG_PRINTF(("wrong sequence number\n"));
244 		return (EINVAL);
245 	}
246 
247 	switch (cmd) {
248 	case IPC_RMID:
249 	{
250 		struct __msg *msghdr;
251 		if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0)
252 			return (error);
253 		/* Free the message headers */
254 		msghdr = msqptr->_msg_first;
255 		while (msghdr != NULL) {
256 			struct __msg *msghdr_tmp;
257 
258 			/* Free the segments of each message */
259 			msqptr->_msg_cbytes -= msghdr->msg_ts;
260 			msqptr->msg_qnum--;
261 			msghdr_tmp = msghdr;
262 			msghdr = msghdr->msg_next;
263 			msg_freehdr(msghdr_tmp);
264 		}
265 
266 		if (msqptr->_msg_cbytes != 0)
267 			panic("msg_cbytes is screwed up");
268 		if (msqptr->msg_qnum != 0)
269 			panic("msg_qnum is screwed up");
270 
271 		msqptr->msg_qbytes = 0;	/* Mark it as free */
272 
273 		wakeup(msqptr);
274 	}
275 		break;
276 
277 	case IPC_SET:
278 		if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)))
279 			return (error);
280 		if (msqbuf->msg_qbytes > msqptr->msg_qbytes && cred->cr_uid != 0)
281 			return (EPERM);
282 		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
283 			MSG_PRINTF(("can't increase msg_qbytes beyond %d "
284 			    "(truncating)\n", msginfo.msgmnb));
285 			/* silently restrict qbytes to system limit */
286 			msqbuf->msg_qbytes = msginfo.msgmnb;
287 		}
288 		if (msqbuf->msg_qbytes == 0) {
289 			MSG_PRINTF(("can't reduce msg_qbytes to 0\n"));
290 			return (EINVAL);	/* XXX non-standard errno! */
291 		}
292 		msqptr->msg_perm.uid = msqbuf->msg_perm.uid;
293 		msqptr->msg_perm.gid = msqbuf->msg_perm.gid;
294 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
295 		    (msqbuf->msg_perm.mode & 0777);
296 		msqptr->msg_qbytes = msqbuf->msg_qbytes;
297 		msqptr->msg_ctime = time.tv_sec;
298 		break;
299 
300 	case IPC_STAT:
301 		if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
302 			MSG_PRINTF(("requester doesn't have read access\n"));
303 			return (error);
304 		}
305 		memcpy(msqbuf, msqptr, sizeof(struct msqid_ds));
306 		break;
307 
308 	default:
309 		MSG_PRINTF(("invalid command %d\n", cmd));
310 		return (EINVAL);
311 	}
312 
313 	return (error);
314 }
315 
316 int
317 sys_msgget(l, v, retval)
318 	struct lwp *l;
319 	void *v;
320 	register_t *retval;
321 {
322 	struct sys_msgget_args /* {
323 		syscallarg(key_t) key;
324 		syscallarg(int) msgflg;
325 	} */ *uap = v;
326 	struct proc *p = l->l_proc;
327 	int msqid, error;
328 	int key = SCARG(uap, key);
329 	int msgflg = SCARG(uap, msgflg);
330 	struct ucred *cred = p->p_ucred;
331 	struct msqid_ds *msqptr = NULL;
332 
333 	MSG_PRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
334 
335 	if (key != IPC_PRIVATE) {
336 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
337 			msqptr = &msqids[msqid];
338 			if (msqptr->msg_qbytes != 0 &&
339 			    msqptr->msg_perm._key == key)
340 				break;
341 		}
342 		if (msqid < msginfo.msgmni) {
343 			MSG_PRINTF(("found public key\n"));
344 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
345 				MSG_PRINTF(("not exclusive\n"));
346 				return(EEXIST);
347 			}
348 			if ((error = ipcperm(cred, &msqptr->msg_perm,
349 			    msgflg & 0700 ))) {
350 				MSG_PRINTF(("requester doesn't have 0%o access\n",
351 				    msgflg & 0700));
352 				return (error);
353 			}
354 			goto found;
355 		}
356 	}
357 
358 	MSG_PRINTF(("need to allocate the msqid_ds\n"));
359 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
360 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
361 			/*
362 			 * Look for an unallocated and unlocked msqid_ds.
363 			 * msqid_ds's can be locked by msgsnd or msgrcv while
364 			 * they are copying the message in/out.  We can't
365 			 * re-use the entry until they release it.
366 			 */
367 			msqptr = &msqids[msqid];
368 			if (msqptr->msg_qbytes == 0 &&
369 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
370 				break;
371 		}
372 		if (msqid == msginfo.msgmni) {
373 			MSG_PRINTF(("no more msqid_ds's available\n"));
374 			return (ENOSPC);
375 		}
376 		MSG_PRINTF(("msqid %d is available\n", msqid));
377 		msqptr->msg_perm._key = key;
378 		msqptr->msg_perm.cuid = cred->cr_uid;
379 		msqptr->msg_perm.uid = cred->cr_uid;
380 		msqptr->msg_perm.cgid = cred->cr_gid;
381 		msqptr->msg_perm.gid = cred->cr_gid;
382 		msqptr->msg_perm.mode = (msgflg & 0777);
383 		/* Make sure that the returned msqid is unique */
384 		msqptr->msg_perm._seq++;
385 		msqptr->_msg_first = NULL;
386 		msqptr->_msg_last = NULL;
387 		msqptr->_msg_cbytes = 0;
388 		msqptr->msg_qnum = 0;
389 		msqptr->msg_qbytes = msginfo.msgmnb;
390 		msqptr->msg_lspid = 0;
391 		msqptr->msg_lrpid = 0;
392 		msqptr->msg_stime = 0;
393 		msqptr->msg_rtime = 0;
394 		msqptr->msg_ctime = time.tv_sec;
395 	} else {
396 		MSG_PRINTF(("didn't find it and wasn't asked to create it\n"));
397 		return (ENOENT);
398 	}
399 
400  found:
401 	/* Construct the unique msqid */
402 	*retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
403 	return (0);
404 }
405 
406 int
407 sys_msgsnd(l, v, retval)
408 	struct lwp *l;
409 	void *v;
410 	register_t *retval;
411 {
412 	struct sys_msgsnd_args /* {
413 		syscallarg(int) msqid;
414 		syscallarg(const void *) msgp;
415 		syscallarg(size_t) msgsz;
416 		syscallarg(int) msgflg;
417 	} */ *uap = v;
418 	struct proc *p = l->l_proc;
419 	int msqid = SCARG(uap, msqid);
420 	const char *user_msgp = SCARG(uap, msgp);
421 	size_t msgsz = SCARG(uap, msgsz);
422 	int msgflg = SCARG(uap, msgflg);
423 	int segs_needed, error;
424 	struct ucred *cred = p->p_ucred;
425 	struct msqid_ds *msqptr;
426 	struct __msg *msghdr;
427 	short next;
428 
429 	MSG_PRINTF(("call to msgsnd(%d, %p, %lld, %d)\n", msqid, user_msgp,
430 	    (long long)msgsz, msgflg));
431 
432 	msqid = IPCID_TO_IX(msqid);
433 
434 	if (msqid < 0 || msqid >= msginfo.msgmni) {
435 		MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
436 		    msginfo.msgmni));
437 		return (EINVAL);
438 	}
439 
440 	msqptr = &msqids[msqid];
441 	if (msqptr->msg_qbytes == 0) {
442 		MSG_PRINTF(("no such message queue id\n"));
443 		return (EINVAL);
444 	}
445 	if (msqptr->msg_perm._seq != IPCID_TO_SEQ(SCARG(uap, msqid))) {
446 		MSG_PRINTF(("wrong sequence number\n"));
447 		return (EINVAL);
448 	}
449 
450 	if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) {
451 		MSG_PRINTF(("requester doesn't have write access\n"));
452 		return (error);
453 	}
454 
455 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
456 	MSG_PRINTF(("msgsz=%lld, msgssz=%d, segs_needed=%d\n",
457 	    (long long)msgsz, msginfo.msgssz, segs_needed));
458 	for (;;) {
459 		int need_more_resources = 0;
460 
461 		/*
462 		 * check msgsz [cannot be negative since it is unsigned]
463 		 * (inside this loop in case msg_qbytes changes while we sleep)
464 		 */
465 
466 		if (msgsz > msqptr->msg_qbytes) {
467 			MSG_PRINTF(("msgsz > msqptr->msg_qbytes\n"));
468 			return (EINVAL);
469 		}
470 
471 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
472 			MSG_PRINTF(("msqid is locked\n"));
473 			need_more_resources = 1;
474 		}
475 		if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) {
476 			MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
477 			need_more_resources = 1;
478 		}
479 		if (segs_needed > nfree_msgmaps) {
480 			MSG_PRINTF(("segs_needed > nfree_msgmaps\n"));
481 			need_more_resources = 1;
482 		}
483 		if (free_msghdrs == NULL) {
484 			MSG_PRINTF(("no more msghdrs\n"));
485 			need_more_resources = 1;
486 		}
487 
488 		if (need_more_resources) {
489 			int we_own_it;
490 
491 			if ((msgflg & IPC_NOWAIT) != 0) {
492 				MSG_PRINTF(("need more resources but caller "
493 				    "doesn't want to wait\n"));
494 				return (EAGAIN);
495 			}
496 
497 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
498 				MSG_PRINTF(("we don't own the msqid_ds\n"));
499 				we_own_it = 0;
500 			} else {
501 				/* Force later arrivals to wait for our
502 				   request */
503 				MSG_PRINTF(("we own the msqid_ds\n"));
504 				msqptr->msg_perm.mode |= MSG_LOCKED;
505 				we_own_it = 1;
506 			}
507 			MSG_PRINTF(("goodnight\n"));
508 			error = tsleep(msqptr, (PZERO - 4) | PCATCH,
509 			    "msgwait", 0);
510 			MSG_PRINTF(("good morning, error=%d\n", error));
511 			if (we_own_it)
512 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
513 			if (error != 0) {
514 				MSG_PRINTF(("msgsnd: interrupted system "
515 				    "call\n"));
516 				return (EINTR);
517 			}
518 
519 			/*
520 			 * Make sure that the msq queue still exists
521 			 */
522 
523 			if (msqptr->msg_qbytes == 0) {
524 				MSG_PRINTF(("msqid deleted\n"));
525 				return (EIDRM);
526 			}
527 		} else {
528 			MSG_PRINTF(("got all the resources that we need\n"));
529 			break;
530 		}
531 	}
532 
533 	/*
534 	 * We have the resources that we need.
535 	 * Make sure!
536 	 */
537 
538 	if (msqptr->msg_perm.mode & MSG_LOCKED)
539 		panic("msg_perm.mode & MSG_LOCKED");
540 	if (segs_needed > nfree_msgmaps)
541 		panic("segs_needed > nfree_msgmaps");
542 	if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes)
543 		panic("msgsz + msg_cbytes > msg_qbytes");
544 	if (free_msghdrs == NULL)
545 		panic("no more msghdrs");
546 
547 	/*
548 	 * Re-lock the msqid_ds in case we page-fault when copying in the
549 	 * message
550 	 */
551 
552 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
553 		panic("msqid_ds is already locked");
554 	msqptr->msg_perm.mode |= MSG_LOCKED;
555 
556 	/*
557 	 * Allocate a message header
558 	 */
559 
560 	msghdr = free_msghdrs;
561 	free_msghdrs = msghdr->msg_next;
562 	msghdr->msg_spot = -1;
563 	msghdr->msg_ts = msgsz;
564 
565 	/*
566 	 * Allocate space for the message
567 	 */
568 
569 	while (segs_needed > 0) {
570 		if (nfree_msgmaps <= 0)
571 			panic("not enough msgmaps");
572 		if (free_msgmaps == -1)
573 			panic("nil free_msgmaps");
574 		next = free_msgmaps;
575 		if (next <= -1)
576 			panic("next too low #1");
577 		if (next >= msginfo.msgseg)
578 			panic("next out of range #1");
579 		MSG_PRINTF(("allocating segment %d to message\n", next));
580 		free_msgmaps = msgmaps[next].next;
581 		nfree_msgmaps--;
582 		msgmaps[next].next = msghdr->msg_spot;
583 		msghdr->msg_spot = next;
584 		segs_needed--;
585 	}
586 
587 	/*
588 	 * Copy in the message type
589 	 */
590 
591 	if ((error = copyin(user_msgp, &msghdr->msg_type,
592 	    sizeof(msghdr->msg_type))) != 0) {
593 		MSG_PRINTF(("error %d copying the message type\n", error));
594 		msg_freehdr(msghdr);
595 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
596 		wakeup(msqptr);
597 		return (error);
598 	}
599 	user_msgp += sizeof(msghdr->msg_type);
600 
601 	/*
602 	 * Validate the message type
603 	 */
604 
605 	if (msghdr->msg_type < 1) {
606 		msg_freehdr(msghdr);
607 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
608 		wakeup(msqptr);
609 		MSG_PRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
610 		return (EINVAL);
611 	}
612 
613 	/*
614 	 * Copy in the message body
615 	 */
616 
617 	next = msghdr->msg_spot;
618 	while (msgsz > 0) {
619 		size_t tlen;
620 		if (msgsz > msginfo.msgssz)
621 			tlen = msginfo.msgssz;
622 		else
623 			tlen = msgsz;
624 		if (next <= -1)
625 			panic("next too low #2");
626 		if (next >= msginfo.msgseg)
627 			panic("next out of range #2");
628 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
629 		    tlen)) != 0) {
630 			MSG_PRINTF(("error %d copying in message segment\n",
631 			    error));
632 			msg_freehdr(msghdr);
633 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
634 			wakeup(msqptr);
635 			return (error);
636 		}
637 		msgsz -= tlen;
638 		user_msgp += tlen;
639 		next = msgmaps[next].next;
640 	}
641 	if (next != -1)
642 		panic("didn't use all the msg segments");
643 
644 	/*
645 	 * We've got the message.  Unlock the msqid_ds.
646 	 */
647 
648 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
649 
650 	/*
651 	 * Make sure that the msqid_ds is still allocated.
652 	 */
653 
654 	if (msqptr->msg_qbytes == 0) {
655 		msg_freehdr(msghdr);
656 		wakeup(msqptr);
657 		return (EIDRM);
658 	}
659 
660 	/*
661 	 * Put the message into the queue
662 	 */
663 
664 	if (msqptr->_msg_first == NULL) {
665 		msqptr->_msg_first = msghdr;
666 		msqptr->_msg_last = msghdr;
667 	} else {
668 		msqptr->_msg_last->msg_next = msghdr;
669 		msqptr->_msg_last = msghdr;
670 	}
671 	msqptr->_msg_last->msg_next = NULL;
672 
673 	msqptr->_msg_cbytes += msghdr->msg_ts;
674 	msqptr->msg_qnum++;
675 	msqptr->msg_lspid = p->p_pid;
676 	msqptr->msg_stime = time.tv_sec;
677 
678 	wakeup(msqptr);
679 	return (0);
680 }
681 
682 int
683 sys_msgrcv(l, v, retval)
684 	struct lwp *l;
685 	void *v;
686 	register_t *retval;
687 {
688 	struct sys_msgrcv_args /* {
689 		syscallarg(int) msqid;
690 		syscallarg(void *) msgp;
691 		syscallarg(size_t) msgsz;
692 		syscallarg(long) msgtyp;
693 		syscallarg(int) msgflg;
694 	} */ *uap = v;
695 	struct proc *p = l->l_proc;
696 	int msqid = SCARG(uap, msqid);
697 	char *user_msgp = SCARG(uap, msgp);
698 	size_t msgsz = SCARG(uap, msgsz);
699 	long msgtyp = SCARG(uap, msgtyp);
700 	int msgflg = SCARG(uap, msgflg);
701 	size_t len;
702 	struct ucred *cred = p->p_ucred;
703 	struct msqid_ds *msqptr;
704 	struct __msg *msghdr;
705 	int error;
706 	short next;
707 
708 	MSG_PRINTF(("call to msgrcv(%d, %p, %lld, %ld, %d)\n", msqid,
709 	    user_msgp, (long long)msgsz, msgtyp, msgflg));
710 
711 	msqid = IPCID_TO_IX(msqid);
712 
713 	if (msqid < 0 || msqid >= msginfo.msgmni) {
714 		MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
715 		    msginfo.msgmni));
716 		return (EINVAL);
717 	}
718 
719 	msqptr = &msqids[msqid];
720 	if (msqptr->msg_qbytes == 0) {
721 		MSG_PRINTF(("no such message queue id\n"));
722 		return (EINVAL);
723 	}
724 	if (msqptr->msg_perm._seq != IPCID_TO_SEQ(SCARG(uap, msqid))) {
725 		MSG_PRINTF(("wrong sequence number\n"));
726 		return (EINVAL);
727 	}
728 
729 	if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
730 		MSG_PRINTF(("requester doesn't have read access\n"));
731 		return (error);
732 	}
733 
734 #if 0
735 	/* cannot happen, msgsz is unsigned */
736 	if (msgsz < 0) {
737 		MSG_PRINTF(("msgsz < 0\n"));
738 		return (EINVAL);
739 	}
740 #endif
741 
742 	msghdr = NULL;
743 	while (msghdr == NULL) {
744 		if (msgtyp == 0) {
745 			msghdr = msqptr->_msg_first;
746 			if (msghdr != NULL) {
747 				if (msgsz < msghdr->msg_ts &&
748 				    (msgflg & MSG_NOERROR) == 0) {
749 					MSG_PRINTF(("first message on the "
750 					    "queue is too big "
751 					    "(want %lld, got %d)\n",
752 					    (long long)msgsz, msghdr->msg_ts));
753 					return (E2BIG);
754 				}
755 				if (msqptr->_msg_first == msqptr->_msg_last) {
756 					msqptr->_msg_first = NULL;
757 					msqptr->_msg_last = NULL;
758 				} else {
759 					msqptr->_msg_first = msghdr->msg_next;
760 					if (msqptr->_msg_first == NULL)
761 						panic("msg_first/last screwed "
762 						    "up #1");
763 				}
764 			}
765 		} else {
766 			struct __msg *previous;
767 			struct __msg **prev;
768 
769 			for (previous = NULL, prev = &msqptr->_msg_first;
770 			     (msghdr = *prev) != NULL;
771 			     previous = msghdr, prev = &msghdr->msg_next) {
772 				/*
773 				 * Is this message's type an exact match or is
774 				 * this message's type less than or equal to
775 				 * the absolute value of a negative msgtyp?
776 				 * Note that the second half of this test can
777 				 * NEVER be true if msgtyp is positive since
778 				 * msg_type is always positive!
779 				 */
780 
781 				if (msgtyp == msghdr->msg_type ||
782 				    msghdr->msg_type <= -msgtyp) {
783 					MSG_PRINTF(("found message type %ld, "
784 					    "requested %ld\n",
785 					    msghdr->msg_type, msgtyp));
786 					if (msgsz < msghdr->msg_ts &&
787 					    (msgflg & MSG_NOERROR) == 0) {
788 						MSG_PRINTF(("requested message "
789 						    "on the queue is too big "
790 						    "(want %lld, got %d)\n",
791 						    (long long)msgsz,
792 						    msghdr->msg_ts));
793 						return (E2BIG);
794 					}
795 					*prev = msghdr->msg_next;
796 					if (msghdr == msqptr->_msg_last) {
797 						if (previous == NULL) {
798 							if (prev !=
799 							    &msqptr->_msg_first)
800 								panic("msg_first/last screwed up #2");
801 							msqptr->_msg_first =
802 							    NULL;
803 							msqptr->_msg_last =
804 							    NULL;
805 						} else {
806 							if (prev ==
807 							    &msqptr->_msg_first)
808 								panic("msg_first/last screwed up #3");
809 							msqptr->_msg_last =
810 							    previous;
811 						}
812 					}
813 					break;
814 				}
815 			}
816 		}
817 
818 		/*
819 		 * We've either extracted the msghdr for the appropriate
820 		 * message or there isn't one.
821 		 * If there is one then bail out of this loop.
822 		 */
823 
824 		if (msghdr != NULL)
825 			break;
826 
827 		/*
828 		 * Hmph!  No message found.  Does the user want to wait?
829 		 */
830 
831 		if ((msgflg & IPC_NOWAIT) != 0) {
832 			MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n",
833 			    msgtyp));
834 			/* The SVID says to return ENOMSG. */
835 #ifdef ENOMSG
836 			return (ENOMSG);
837 #else
838 			/* Unfortunately, BSD doesn't define that code yet! */
839 			return (EAGAIN);
840 #endif
841 		}
842 
843 		/*
844 		 * Wait for something to happen
845 		 */
846 
847 		MSG_PRINTF(("msgrcv:  goodnight\n"));
848 		error = tsleep(msqptr, (PZERO - 4) | PCATCH, "msgwait",
849 		    0);
850 		MSG_PRINTF(("msgrcv: good morning (error=%d)\n", error));
851 
852 		if (error != 0) {
853 			MSG_PRINTF(("msgsnd: interrupted system call\n"));
854 			return (EINTR);
855 		}
856 
857 		/*
858 		 * Make sure that the msq queue still exists
859 		 */
860 
861 		if (msqptr->msg_qbytes == 0 ||
862 		    msqptr->msg_perm._seq != IPCID_TO_SEQ(SCARG(uap, msqid))) {
863 			MSG_PRINTF(("msqid deleted\n"));
864 			return (EIDRM);
865 		}
866 	}
867 
868 	/*
869 	 * Return the message to the user.
870 	 *
871 	 * First, do the bookkeeping (before we risk being interrupted).
872 	 */
873 
874 	msqptr->_msg_cbytes -= msghdr->msg_ts;
875 	msqptr->msg_qnum--;
876 	msqptr->msg_lrpid = p->p_pid;
877 	msqptr->msg_rtime = time.tv_sec;
878 
879 	/*
880 	 * Make msgsz the actual amount that we'll be returning.
881 	 * Note that this effectively truncates the message if it is too long
882 	 * (since msgsz is never increased).
883 	 */
884 
885 	MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n",
886 	    (long long)msgsz, msghdr->msg_ts));
887 	if (msgsz > msghdr->msg_ts)
888 		msgsz = msghdr->msg_ts;
889 
890 	/*
891 	 * Return the type to the user.
892 	 */
893 
894 	error = copyout(&msghdr->msg_type, user_msgp, sizeof(msghdr->msg_type));
895 	if (error != 0) {
896 		MSG_PRINTF(("error (%d) copying out message type\n", error));
897 		msg_freehdr(msghdr);
898 		wakeup(msqptr);
899 		return (error);
900 	}
901 	user_msgp += sizeof(msghdr->msg_type);
902 
903 	/*
904 	 * Return the segments to the user
905 	 */
906 
907 	next = msghdr->msg_spot;
908 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
909 		size_t tlen;
910 
911 		if (msgsz - len > msginfo.msgssz)
912 			tlen = msginfo.msgssz;
913 		else
914 			tlen = msgsz - len;
915 		if (next <= -1)
916 			panic("next too low #3");
917 		if (next >= msginfo.msgseg)
918 			panic("next out of range #3");
919 		error = copyout(&msgpool[next * msginfo.msgssz],
920 		    user_msgp, tlen);
921 		if (error != 0) {
922 			MSG_PRINTF(("error (%d) copying out message segment\n",
923 			    error));
924 			msg_freehdr(msghdr);
925 			wakeup(msqptr);
926 			return (error);
927 		}
928 		user_msgp += tlen;
929 		next = msgmaps[next].next;
930 	}
931 
932 	/*
933 	 * Done, return the actual number of bytes copied out.
934 	 */
935 
936 	msg_freehdr(msghdr);
937 	wakeup(msqptr);
938 	*retval = msgsz;
939 	return (0);
940 }
941