xref: /netbsd-src/sys/kern/sysv_sem.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*	$NetBSD: sysv_sem.c,v 1.88 2011/07/30 06:19:02 uebayasi Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Implementation of SVID semaphores
35  *
36  * Author: Daniel Boulet
37  *
38  * This software is provided ``AS IS'' without any warranties of any kind.
39  */
40 
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.88 2011/07/30 06:19:02 uebayasi Exp $");
43 
44 #define SYSVSEM
45 
46 #include <sys/param.h>
47 #include <sys/kernel.h>
48 #include <sys/sem.h>
49 #include <sys/sysctl.h>
50 #include <sys/kmem.h>
51 #include <sys/mount.h>		/* XXX for <sys/syscallargs.h> */
52 #include <sys/syscallargs.h>
53 #include <sys/kauth.h>
54 
55 /*
56  * Memory areas:
57  *  1st: Pool of semaphore identifiers
58  *  2nd: Semaphores
59  *  3rd: Conditional variables
60  *  4th: Undo structures
61  */
62 struct semid_ds *	sema			__read_mostly;
63 static struct __sem *	sem			__read_mostly;
64 static kcondvar_t *	semcv			__read_mostly;
65 static int *		semu			__read_mostly;
66 
67 static kmutex_t		semlock			__cacheline_aligned;
68 static bool		sem_realloc_state	__read_mostly;
69 static kcondvar_t	sem_realloc_cv;
70 
71 /*
72  * List of active undo structures, total number of semaphores,
73  * and total number of semop waiters.
74  */
75 static struct sem_undo *semu_list		__read_mostly;
76 static u_int		semtot			__cacheline_aligned;
77 static u_int		sem_waiters		__cacheline_aligned;
78 
79 /* Macro to find a particular sem_undo vector */
80 #define SEMU(s, ix)	((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
81 
82 #ifdef SEM_DEBUG
83 #define SEM_PRINTF(a) printf a
84 #else
85 #define SEM_PRINTF(a)
86 #endif
87 
88 struct sem_undo *semu_alloc(struct proc *);
89 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
90 void semundo_clear(int, int);
91 
92 void
93 seminit(void)
94 {
95 	int i, sz;
96 	vaddr_t v;
97 
98 	mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
99 	cv_init(&sem_realloc_cv, "semrealc");
100 	sem_realloc_state = false;
101 	semtot = 0;
102 	sem_waiters = 0;
103 
104 	/* Allocate the wired memory for our structures */
105 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
106 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
107 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
108 	    ALIGN(seminfo.semmnu * seminfo.semusz);
109 	sz = round_page(sz);
110 	v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
111 	if (v == 0)
112 		panic("sysv_sem: cannot allocate memory");
113 	sema = (void *)v;
114 	sem = (void *)((uintptr_t)sema +
115 	    ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
116 	semcv = (void *)((uintptr_t)sem +
117 	    ALIGN(seminfo.semmns * sizeof(struct __sem)));
118 	semu = (void *)((uintptr_t)semcv +
119 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)));
120 
121 	for (i = 0; i < seminfo.semmni; i++) {
122 		sema[i]._sem_base = 0;
123 		sema[i].sem_perm.mode = 0;
124 		cv_init(&semcv[i], "semwait");
125 	}
126 	for (i = 0; i < seminfo.semmnu; i++) {
127 		struct sem_undo *suptr = SEMU(semu, i);
128 		suptr->un_proc = NULL;
129 	}
130 	semu_list = NULL;
131 	exithook_establish(semexit, NULL);
132 }
133 
134 static int
135 semrealloc(int newsemmni, int newsemmns, int newsemmnu)
136 {
137 	struct semid_ds *new_sema, *old_sema;
138 	struct __sem *new_sem;
139 	struct sem_undo *new_semu_list, *suptr, *nsuptr;
140 	int *new_semu;
141 	kcondvar_t *new_semcv;
142 	vaddr_t v;
143 	int i, j, lsemid, nmnus, sz;
144 
145 	if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
146 		return EINVAL;
147 
148 	/* Allocate the wired memory for our structures */
149 	sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
150 	    ALIGN(newsemmns * sizeof(struct __sem)) +
151 	    ALIGN(newsemmni * sizeof(kcondvar_t)) +
152 	    ALIGN(newsemmnu * seminfo.semusz);
153 	sz = round_page(sz);
154 	v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
155 	if (v == 0)
156 		return ENOMEM;
157 
158 	mutex_enter(&semlock);
159 	if (sem_realloc_state) {
160 		mutex_exit(&semlock);
161 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
162 		return EBUSY;
163 	}
164 	sem_realloc_state = true;
165 	if (sem_waiters) {
166 		/*
167 		 * Mark reallocation state, wake-up all waiters,
168 		 * and wait while they will all exit.
169 		 */
170 		for (i = 0; i < seminfo.semmni; i++)
171 			cv_broadcast(&semcv[i]);
172 		while (sem_waiters)
173 			cv_wait(&sem_realloc_cv, &semlock);
174 	}
175 	old_sema = sema;
176 
177 	/* Get the number of last slot */
178 	lsemid = 0;
179 	for (i = 0; i < seminfo.semmni; i++)
180 		if (sema[i].sem_perm.mode & SEM_ALLOC)
181 			lsemid = i;
182 
183 	/* Get the number of currently used undo structures */
184 	nmnus = 0;
185 	for (i = 0; i < seminfo.semmnu; i++) {
186 		suptr = SEMU(semu, i);
187 		if (suptr->un_proc == NULL)
188 			continue;
189 		nmnus++;
190 	}
191 
192 	/* We cannot reallocate less memory than we use */
193 	if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
194 		mutex_exit(&semlock);
195 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
196 		return EBUSY;
197 	}
198 
199 	new_sema = (void *)v;
200 	new_sem = (void *)((uintptr_t)new_sema +
201 	    ALIGN(newsemmni * sizeof(struct semid_ds)));
202 	new_semcv = (void *)((uintptr_t)new_sem +
203 	    ALIGN(newsemmns * sizeof(struct __sem)));
204 	new_semu = (void *)((uintptr_t)new_semcv +
205 	    ALIGN(newsemmni * sizeof(kcondvar_t)));
206 
207 	/* Initialize all semaphore identifiers and condvars */
208 	for (i = 0; i < newsemmni; i++) {
209 		new_sema[i]._sem_base = 0;
210 		new_sema[i].sem_perm.mode = 0;
211 		cv_init(&new_semcv[i], "semwait");
212 	}
213 	for (i = 0; i < newsemmnu; i++) {
214 		nsuptr = SEMU(new_semu, i);
215 		nsuptr->un_proc = NULL;
216 	}
217 
218 	/*
219 	 * Copy all identifiers, semaphores and list of the
220 	 * undo structures to the new memory allocation.
221 	 */
222 	j = 0;
223 	for (i = 0; i <= lsemid; i++) {
224 		if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
225 			continue;
226 		memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
227 		new_sema[i]._sem_base = &new_sem[j];
228 		memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
229 		    (sizeof(struct __sem) * sema[i].sem_nsems));
230 		j += sema[i].sem_nsems;
231 	}
232 	KASSERT(j == semtot);
233 
234 	j = 0;
235 	new_semu_list = NULL;
236 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
237 		KASSERT(j < newsemmnu);
238 		nsuptr = SEMU(new_semu, j);
239 		memcpy(nsuptr, suptr, SEMUSZ);
240 		nsuptr->un_next = new_semu_list;
241 		new_semu_list = nsuptr;
242 		j++;
243 	}
244 
245 	for (i = 0; i < seminfo.semmni; i++) {
246 		KASSERT(cv_has_waiters(&semcv[i]) == false);
247 		cv_destroy(&semcv[i]);
248 	}
249 
250 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
251 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
252 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
253 	    ALIGN(seminfo.semmnu * seminfo.semusz);
254 	sz = round_page(sz);
255 
256 	/* Set the pointers and update the new values */
257 	sema = new_sema;
258 	sem = new_sem;
259 	semcv = new_semcv;
260 	semu = new_semu;
261 	semu_list = new_semu_list;
262 
263 	seminfo.semmni = newsemmni;
264 	seminfo.semmns = newsemmns;
265 	seminfo.semmnu = newsemmnu;
266 
267 	/* Reallocation completed - notify all waiters, if any */
268 	sem_realloc_state = false;
269 	cv_broadcast(&sem_realloc_cv);
270 	mutex_exit(&semlock);
271 
272 	uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
273 	return 0;
274 }
275 
276 /*
277  * Placebo.
278  */
279 
280 int
281 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
282 {
283 
284 	*retval = 0;
285 	return 0;
286 }
287 
288 /*
289  * Allocate a new sem_undo structure for a process.
290  * => Returns NULL on failure.
291  */
292 struct sem_undo *
293 semu_alloc(struct proc *p)
294 {
295 	struct sem_undo *suptr, **supptr;
296 	bool attempted = false;
297 	int i;
298 
299 	KASSERT(mutex_owned(&semlock));
300 again:
301 	/* Look for a free structure. */
302 	for (i = 0; i < seminfo.semmnu; i++) {
303 		suptr = SEMU(semu, i);
304 		if (suptr->un_proc == NULL) {
305 			/* Found.  Fill it in and return. */
306 			suptr->un_next = semu_list;
307 			semu_list = suptr;
308 			suptr->un_cnt = 0;
309 			suptr->un_proc = p;
310 			return suptr;
311 		}
312 	}
313 
314 	/* Not found.  Attempt to free some structures. */
315 	if (!attempted) {
316 		bool freed = false;
317 
318 		attempted = true;
319 		supptr = &semu_list;
320 		while ((suptr = *supptr) != NULL) {
321 			if (suptr->un_cnt == 0)  {
322 				suptr->un_proc = NULL;
323 				*supptr = suptr->un_next;
324 				freed = true;
325 			} else {
326 				supptr = &suptr->un_next;
327 			}
328 		}
329 		if (freed) {
330 			goto again;
331 		}
332 	}
333 	return NULL;
334 }
335 
336 /*
337  * Adjust a particular entry for a particular proc
338  */
339 
340 int
341 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
342     int adjval)
343 {
344 	struct sem_undo *suptr;
345 	struct undo *sunptr;
346 	int i;
347 
348 	KASSERT(mutex_owned(&semlock));
349 
350 	/*
351 	 * Look for and remember the sem_undo if the caller doesn't
352 	 * provide it
353 	 */
354 
355 	suptr = *supptr;
356 	if (suptr == NULL) {
357 		for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
358 			if (suptr->un_proc == p)
359 				break;
360 
361 		if (suptr == NULL) {
362 			suptr = semu_alloc(p);
363 			if (suptr == NULL)
364 				return (ENOSPC);
365 		}
366 		*supptr = suptr;
367 	}
368 
369 	/*
370 	 * Look for the requested entry and adjust it (delete if
371 	 * adjval becomes 0).
372 	 */
373 	sunptr = &suptr->un_ent[0];
374 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
375 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
376 			continue;
377 		sunptr->un_adjval += adjval;
378 		if (sunptr->un_adjval == 0) {
379 			suptr->un_cnt--;
380 			if (i < suptr->un_cnt)
381 				suptr->un_ent[i] =
382 				    suptr->un_ent[suptr->un_cnt];
383 		}
384 		return (0);
385 	}
386 
387 	/* Didn't find the right entry - create it */
388 	if (suptr->un_cnt == SEMUME)
389 		return (EINVAL);
390 
391 	sunptr = &suptr->un_ent[suptr->un_cnt];
392 	suptr->un_cnt++;
393 	sunptr->un_adjval = adjval;
394 	sunptr->un_id = semid;
395 	sunptr->un_num = semnum;
396 	return (0);
397 }
398 
399 void
400 semundo_clear(int semid, int semnum)
401 {
402 	struct sem_undo *suptr;
403 	struct undo *sunptr, *sunend;
404 
405 	KASSERT(mutex_owned(&semlock));
406 
407 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
408 		for (sunptr = &suptr->un_ent[0],
409 		    sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
410 			if (sunptr->un_id == semid) {
411 				if (semnum == -1 || sunptr->un_num == semnum) {
412 					suptr->un_cnt--;
413 					sunend--;
414 					if (sunptr != sunend)
415 						*sunptr = *sunend;
416 					if (semnum != -1)
417 						break;
418 					else
419 						continue;
420 				}
421 			}
422 			sunptr++;
423 		}
424 }
425 
426 int
427 sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
428     register_t *retval)
429 {
430 	/* {
431 		syscallarg(int) semid;
432 		syscallarg(int) semnum;
433 		syscallarg(int) cmd;
434 		syscallarg(union __semun *) arg;
435 	} */
436 	struct semid_ds sembuf;
437 	int cmd, error;
438 	void *pass_arg;
439 	union __semun karg;
440 
441 	cmd = SCARG(uap, cmd);
442 
443 	pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
444 
445 	if (pass_arg) {
446 		error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
447 		if (error)
448 			return error;
449 		if (cmd == IPC_SET) {
450 			error = copyin(karg.buf, &sembuf, sizeof(sembuf));
451 			if (error)
452 				return (error);
453 		}
454 	}
455 
456 	error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
457 	    pass_arg, retval);
458 
459 	if (error == 0 && cmd == IPC_STAT)
460 		error = copyout(&sembuf, karg.buf, sizeof(sembuf));
461 
462 	return (error);
463 }
464 
465 int
466 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
467     register_t *retval)
468 {
469 	kauth_cred_t cred = l->l_cred;
470 	union __semun *arg = v;
471 	struct semid_ds *sembuf = v, *semaptr;
472 	int i, error, ix;
473 
474 	SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
475 	    semid, semnum, cmd, v));
476 
477 	mutex_enter(&semlock);
478 
479 	ix = IPCID_TO_IX(semid);
480 	if (ix < 0 || ix >= seminfo.semmni) {
481 		mutex_exit(&semlock);
482 		return (EINVAL);
483 	}
484 
485 	semaptr = &sema[ix];
486 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
487 	    semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
488 		mutex_exit(&semlock);
489 		return (EINVAL);
490 	}
491 
492 	switch (cmd) {
493 	case IPC_RMID:
494 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
495 			break;
496 		semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
497 		semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
498 		semtot -= semaptr->sem_nsems;
499 		for (i = semaptr->_sem_base - sem; i < semtot; i++)
500 			sem[i] = sem[i + semaptr->sem_nsems];
501 		for (i = 0; i < seminfo.semmni; i++) {
502 			if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
503 			    sema[i]._sem_base > semaptr->_sem_base)
504 				sema[i]._sem_base -= semaptr->sem_nsems;
505 		}
506 		semaptr->sem_perm.mode = 0;
507 		semundo_clear(ix, -1);
508 		cv_broadcast(&semcv[ix]);
509 		break;
510 
511 	case IPC_SET:
512 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
513 			break;
514 		KASSERT(sembuf != NULL);
515 		semaptr->sem_perm.uid = sembuf->sem_perm.uid;
516 		semaptr->sem_perm.gid = sembuf->sem_perm.gid;
517 		semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
518 		    (sembuf->sem_perm.mode & 0777);
519 		semaptr->sem_ctime = time_second;
520 		break;
521 
522 	case IPC_STAT:
523 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
524 			break;
525 		KASSERT(sembuf != NULL);
526 		memcpy(sembuf, semaptr, sizeof(struct semid_ds));
527 		sembuf->sem_perm.mode &= 0777;
528 		break;
529 
530 	case GETNCNT:
531 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
532 			break;
533 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
534 			error = EINVAL;
535 			break;
536 		}
537 		*retval = semaptr->_sem_base[semnum].semncnt;
538 		break;
539 
540 	case GETPID:
541 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
542 			break;
543 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
544 			error = EINVAL;
545 			break;
546 		}
547 		*retval = semaptr->_sem_base[semnum].sempid;
548 		break;
549 
550 	case GETVAL:
551 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
552 			break;
553 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
554 			error = EINVAL;
555 			break;
556 		}
557 		*retval = semaptr->_sem_base[semnum].semval;
558 		break;
559 
560 	case GETALL:
561 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
562 			break;
563 		KASSERT(arg != NULL);
564 		for (i = 0; i < semaptr->sem_nsems; i++) {
565 			error = copyout(&semaptr->_sem_base[i].semval,
566 			    &arg->array[i], sizeof(arg->array[i]));
567 			if (error != 0)
568 				break;
569 		}
570 		break;
571 
572 	case GETZCNT:
573 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
574 			break;
575 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
576 			error = EINVAL;
577 			break;
578 		}
579 		*retval = semaptr->_sem_base[semnum].semzcnt;
580 		break;
581 
582 	case SETVAL:
583 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
584 			break;
585 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
586 			error = EINVAL;
587 			break;
588 		}
589 		KASSERT(arg != NULL);
590 		if ((unsigned int)arg->val > seminfo.semvmx) {
591 			error = ERANGE;
592 			break;
593 		}
594 		semaptr->_sem_base[semnum].semval = arg->val;
595 		semundo_clear(ix, semnum);
596 		cv_broadcast(&semcv[ix]);
597 		break;
598 
599 	case SETALL:
600 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
601 			break;
602 		KASSERT(arg != NULL);
603 		for (i = 0; i < semaptr->sem_nsems; i++) {
604 			unsigned short semval;
605 			error = copyin(&arg->array[i], &semval,
606 			    sizeof(arg->array[i]));
607 			if (error != 0)
608 				break;
609 			if ((unsigned int)semval > seminfo.semvmx) {
610 				error = ERANGE;
611 				break;
612 			}
613 			semaptr->_sem_base[i].semval = semval;
614 		}
615 		semundo_clear(ix, -1);
616 		cv_broadcast(&semcv[ix]);
617 		break;
618 
619 	default:
620 		error = EINVAL;
621 		break;
622 	}
623 
624 	mutex_exit(&semlock);
625 	return (error);
626 }
627 
628 int
629 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
630 {
631 	/* {
632 		syscallarg(key_t) key;
633 		syscallarg(int) nsems;
634 		syscallarg(int) semflg;
635 	} */
636 	int semid, error = 0;
637 	int key = SCARG(uap, key);
638 	int nsems = SCARG(uap, nsems);
639 	int semflg = SCARG(uap, semflg);
640 	kauth_cred_t cred = l->l_cred;
641 
642 	SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
643 
644 	mutex_enter(&semlock);
645 
646 	if (key != IPC_PRIVATE) {
647 		for (semid = 0; semid < seminfo.semmni; semid++) {
648 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
649 			    sema[semid].sem_perm._key == key)
650 				break;
651 		}
652 		if (semid < seminfo.semmni) {
653 			SEM_PRINTF(("found public key\n"));
654 			if ((error = ipcperm(cred, &sema[semid].sem_perm,
655 			    semflg & 0700)))
656 			    	goto out;
657 			if (nsems > 0 && sema[semid].sem_nsems < nsems) {
658 				SEM_PRINTF(("too small\n"));
659 				error = EINVAL;
660 				goto out;
661 			}
662 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
663 				SEM_PRINTF(("not exclusive\n"));
664 				error = EEXIST;
665 				goto out;
666 			}
667 			goto found;
668 		}
669 	}
670 
671 	SEM_PRINTF(("need to allocate the semid_ds\n"));
672 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
673 		if (nsems <= 0 || nsems > seminfo.semmsl) {
674 			SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
675 			    seminfo.semmsl));
676 			error = EINVAL;
677 			goto out;
678 		}
679 		if (nsems > seminfo.semmns - semtot) {
680 			SEM_PRINTF(("not enough semaphores left "
681 			    "(need %d, got %d)\n",
682 			    nsems, seminfo.semmns - semtot));
683 			error = ENOSPC;
684 			goto out;
685 		}
686 		for (semid = 0; semid < seminfo.semmni; semid++) {
687 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
688 				break;
689 		}
690 		if (semid == seminfo.semmni) {
691 			SEM_PRINTF(("no more semid_ds's available\n"));
692 			error = ENOSPC;
693 			goto out;
694 		}
695 		SEM_PRINTF(("semid %d is available\n", semid));
696 		sema[semid].sem_perm._key = key;
697 		sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
698 		sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
699 		sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
700 		sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
701 		sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
702 		sema[semid].sem_perm._seq =
703 		    (sema[semid].sem_perm._seq + 1) & 0x7fff;
704 		sema[semid].sem_nsems = nsems;
705 		sema[semid].sem_otime = 0;
706 		sema[semid].sem_ctime = time_second;
707 		sema[semid]._sem_base = &sem[semtot];
708 		semtot += nsems;
709 		memset(sema[semid]._sem_base, 0,
710 		    sizeof(sema[semid]._sem_base[0]) * nsems);
711 		SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
712 		    &sem[semtot]));
713 	} else {
714 		SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
715 		error = ENOENT;
716 		goto out;
717 	}
718 
719  found:
720 	*retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
721  out:
722 	mutex_exit(&semlock);
723 	return (error);
724 }
725 
726 #define SMALL_SOPS 8
727 
728 int
729 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
730 {
731 	/* {
732 		syscallarg(int) semid;
733 		syscallarg(struct sembuf *) sops;
734 		syscallarg(size_t) nsops;
735 	} */
736 	struct proc *p = l->l_proc;
737 	int semid = SCARG(uap, semid), seq;
738 	size_t nsops = SCARG(uap, nsops);
739 	struct sembuf small_sops[SMALL_SOPS];
740 	struct sembuf *sops;
741 	struct semid_ds *semaptr;
742 	struct sembuf *sopptr = NULL;
743 	struct __sem *semptr = NULL;
744 	struct sem_undo *suptr = NULL;
745 	kauth_cred_t cred = l->l_cred;
746 	int i, error;
747 	int do_wakeup, do_undos;
748 
749 	SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
750 
751 	if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
752 		mutex_enter(p->p_lock);
753 		p->p_flag |= PK_SYSVSEM;
754 		mutex_exit(p->p_lock);
755 	}
756 
757 restart:
758 	if (nsops <= SMALL_SOPS) {
759 		sops = small_sops;
760 	} else if (nsops <= seminfo.semopm) {
761 		sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
762 	} else {
763 		SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
764 		    seminfo.semopm, nsops));
765 		return (E2BIG);
766 	}
767 
768 	error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
769 	if (error) {
770 		SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
771 		    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
772 		if (sops != small_sops)
773 			kmem_free(sops, nsops * sizeof(*sops));
774 		return error;
775 	}
776 
777 	mutex_enter(&semlock);
778 	/* In case of reallocation, we will wait for completion */
779 	while (__predict_false(sem_realloc_state))
780 		cv_wait(&sem_realloc_cv, &semlock);
781 
782 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
783 	if (semid < 0 || semid >= seminfo.semmni) {
784 		error = EINVAL;
785 		goto out;
786 	}
787 
788 	semaptr = &sema[semid];
789 	seq = IPCID_TO_SEQ(SCARG(uap, semid));
790 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
791 	    semaptr->sem_perm._seq != seq) {
792 		error = EINVAL;
793 		goto out;
794 	}
795 
796 	if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
797 		SEM_PRINTF(("error = %d from ipaccess\n", error));
798 		goto out;
799 	}
800 
801 	for (i = 0; i < nsops; i++)
802 		if (sops[i].sem_num >= semaptr->sem_nsems) {
803 			error = EFBIG;
804 			goto out;
805 		}
806 
807 	/*
808 	 * Loop trying to satisfy the vector of requests.
809 	 * If we reach a point where we must wait, any requests already
810 	 * performed are rolled back and we go to sleep until some other
811 	 * process wakes us up.  At this point, we start all over again.
812 	 *
813 	 * This ensures that from the perspective of other tasks, a set
814 	 * of requests is atomic (never partially satisfied).
815 	 */
816 	do_undos = 0;
817 
818 	for (;;) {
819 		do_wakeup = 0;
820 
821 		for (i = 0; i < nsops; i++) {
822 			sopptr = &sops[i];
823 			semptr = &semaptr->_sem_base[sopptr->sem_num];
824 
825 			SEM_PRINTF(("semop:  semaptr=%p, sem_base=%p, "
826 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
827 			    semaptr, semaptr->_sem_base, semptr,
828 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
829 			    (sopptr->sem_flg & IPC_NOWAIT) ?
830 			    "nowait" : "wait"));
831 
832 			if (sopptr->sem_op < 0) {
833 				if ((int)(semptr->semval +
834 				    sopptr->sem_op) < 0) {
835 					SEM_PRINTF(("semop:  "
836 					    "can't do it now\n"));
837 					break;
838 				} else {
839 					semptr->semval += sopptr->sem_op;
840 					if (semptr->semval == 0 &&
841 					    semptr->semzcnt > 0)
842 						do_wakeup = 1;
843 				}
844 				if (sopptr->sem_flg & SEM_UNDO)
845 					do_undos = 1;
846 			} else if (sopptr->sem_op == 0) {
847 				if (semptr->semval > 0) {
848 					SEM_PRINTF(("semop:  not zero now\n"));
849 					break;
850 				}
851 			} else {
852 				if (semptr->semncnt > 0)
853 					do_wakeup = 1;
854 				semptr->semval += sopptr->sem_op;
855 				if (sopptr->sem_flg & SEM_UNDO)
856 					do_undos = 1;
857 			}
858 		}
859 
860 		/*
861 		 * Did we get through the entire vector?
862 		 */
863 		if (i >= nsops)
864 			goto done;
865 
866 		/*
867 		 * No ... rollback anything that we've already done
868 		 */
869 		SEM_PRINTF(("semop:  rollback 0 through %d\n", i - 1));
870 		while (i-- > 0)
871 			semaptr->_sem_base[sops[i].sem_num].semval -=
872 			    sops[i].sem_op;
873 
874 		/*
875 		 * If the request that we couldn't satisfy has the
876 		 * NOWAIT flag set then return with EAGAIN.
877 		 */
878 		if (sopptr->sem_flg & IPC_NOWAIT) {
879 			error = EAGAIN;
880 			goto out;
881 		}
882 
883 		if (sopptr->sem_op == 0)
884 			semptr->semzcnt++;
885 		else
886 			semptr->semncnt++;
887 
888 		sem_waiters++;
889 		SEM_PRINTF(("semop:  good night!\n"));
890 		error = cv_wait_sig(&semcv[semid], &semlock);
891 		SEM_PRINTF(("semop:  good morning (error=%d)!\n", error));
892 		sem_waiters--;
893 
894 		/* Notify reallocator, if it is waiting */
895 		cv_broadcast(&sem_realloc_cv);
896 
897 		/*
898 		 * Make sure that the semaphore still exists
899 		 */
900 		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
901 		    semaptr->sem_perm._seq != seq) {
902 			error = EIDRM;
903 			goto out;
904 		}
905 
906 		/*
907 		 * The semaphore is still alive.  Readjust the count of
908 		 * waiting processes.
909 		 */
910 		semptr = &semaptr->_sem_base[sopptr->sem_num];
911 		if (sopptr->sem_op == 0)
912 			semptr->semzcnt--;
913 		else
914 			semptr->semncnt--;
915 
916 		/* In case of such state, restart the call */
917 		if (sem_realloc_state) {
918 			mutex_exit(&semlock);
919 			goto restart;
920 		}
921 
922 		/* Is it really morning, or was our sleep interrupted? */
923 		if (error != 0) {
924 			error = EINTR;
925 			goto out;
926 		}
927 		SEM_PRINTF(("semop:  good morning!\n"));
928 	}
929 
930 done:
931 	/*
932 	 * Process any SEM_UNDO requests.
933 	 */
934 	if (do_undos) {
935 		for (i = 0; i < nsops; i++) {
936 			/*
937 			 * We only need to deal with SEM_UNDO's for non-zero
938 			 * op's.
939 			 */
940 			int adjval;
941 
942 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
943 				continue;
944 			adjval = sops[i].sem_op;
945 			if (adjval == 0)
946 				continue;
947 			error = semundo_adjust(p, &suptr, semid,
948 			    sops[i].sem_num, -adjval);
949 			if (error == 0)
950 				continue;
951 
952 			/*
953 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
954 			 * Rollback the adjustments to this point and then
955 			 * rollback the semaphore ups and down so we can return
956 			 * with an error with all structures restored.  We
957 			 * rollback the undo's in the exact reverse order that
958 			 * we applied them.  This guarantees that we won't run
959 			 * out of space as we roll things back out.
960 			 */
961 			while (i-- > 0) {
962 				if ((sops[i].sem_flg & SEM_UNDO) == 0)
963 					continue;
964 				adjval = sops[i].sem_op;
965 				if (adjval == 0)
966 					continue;
967 				if (semundo_adjust(p, &suptr, semid,
968 				    sops[i].sem_num, adjval) != 0)
969 					panic("semop - can't undo undos");
970 			}
971 
972 			for (i = 0; i < nsops; i++)
973 				semaptr->_sem_base[sops[i].sem_num].semval -=
974 				    sops[i].sem_op;
975 
976 			SEM_PRINTF(("error = %d from semundo_adjust\n", error));
977 			goto out;
978 		} /* loop through the sops */
979 	} /* if (do_undos) */
980 
981 	/* We're definitely done - set the sempid's */
982 	for (i = 0; i < nsops; i++) {
983 		sopptr = &sops[i];
984 		semptr = &semaptr->_sem_base[sopptr->sem_num];
985 		semptr->sempid = p->p_pid;
986 	}
987 
988 	/* Update sem_otime */
989 	semaptr->sem_otime = time_second;
990 
991 	/* Do a wakeup if any semaphore was up'd. */
992 	if (do_wakeup) {
993 		SEM_PRINTF(("semop:  doing wakeup\n"));
994 		cv_broadcast(&semcv[semid]);
995 		SEM_PRINTF(("semop:  back from wakeup\n"));
996 	}
997 	SEM_PRINTF(("semop:  done\n"));
998 	*retval = 0;
999 
1000  out:
1001 	mutex_exit(&semlock);
1002 	if (sops != small_sops)
1003 		kmem_free(sops, nsops * sizeof(*sops));
1004 	return error;
1005 }
1006 
1007 /*
1008  * Go through the undo structures for this process and apply the
1009  * adjustments to semaphores.
1010  */
1011 /*ARGSUSED*/
1012 void
1013 semexit(struct proc *p, void *v)
1014 {
1015 	struct sem_undo *suptr;
1016 	struct sem_undo **supptr;
1017 
1018 	if ((p->p_flag & PK_SYSVSEM) == 0)
1019 		return;
1020 
1021 	mutex_enter(&semlock);
1022 
1023 	/*
1024 	 * Go through the chain of undo vectors looking for one
1025 	 * associated with this process.
1026 	 */
1027 
1028 	for (supptr = &semu_list; (suptr = *supptr) != NULL;
1029 	    supptr = &suptr->un_next) {
1030 		if (suptr->un_proc == p)
1031 			break;
1032 	}
1033 
1034 	/*
1035 	 * If there is no undo vector, skip to the end.
1036 	 */
1037 
1038 	if (suptr == NULL) {
1039 		mutex_exit(&semlock);
1040 		return;
1041 	}
1042 
1043 	/*
1044 	 * We now have an undo vector for this process.
1045 	 */
1046 
1047 	SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
1048 	    suptr->un_cnt));
1049 
1050 	/*
1051 	 * If there are any active undo elements then process them.
1052 	 */
1053 	if (suptr->un_cnt > 0) {
1054 		int ix;
1055 
1056 		for (ix = 0; ix < suptr->un_cnt; ix++) {
1057 			int semid = suptr->un_ent[ix].un_id;
1058 			int semnum = suptr->un_ent[ix].un_num;
1059 			int adjval = suptr->un_ent[ix].un_adjval;
1060 			struct semid_ds *semaptr;
1061 
1062 			semaptr = &sema[semid];
1063 			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1064 				panic("semexit - semid not allocated");
1065 			if (semnum >= semaptr->sem_nsems)
1066 				panic("semexit - semnum out of range");
1067 
1068 			SEM_PRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; "
1069 			    "sem=%d\n",
1070 			    suptr->un_proc, suptr->un_ent[ix].un_id,
1071 			    suptr->un_ent[ix].un_num,
1072 			    suptr->un_ent[ix].un_adjval,
1073 			    semaptr->_sem_base[semnum].semval));
1074 
1075 			if (adjval < 0 &&
1076 			    semaptr->_sem_base[semnum].semval < -adjval)
1077 				semaptr->_sem_base[semnum].semval = 0;
1078 			else
1079 				semaptr->_sem_base[semnum].semval += adjval;
1080 
1081 			cv_broadcast(&semcv[semid]);
1082 			SEM_PRINTF(("semexit:  back from wakeup\n"));
1083 		}
1084 	}
1085 
1086 	/*
1087 	 * Deallocate the undo vector.
1088 	 */
1089 	SEM_PRINTF(("removing vector\n"));
1090 	suptr->un_proc = NULL;
1091 	*supptr = suptr->un_next;
1092 	mutex_exit(&semlock);
1093 }
1094 
1095 /*
1096  * Sysctl initialization and nodes.
1097  */
1098 
1099 static int
1100 sysctl_ipc_semmni(SYSCTLFN_ARGS)
1101 {
1102 	int newsize, error;
1103 	struct sysctlnode node;
1104 	node = *rnode;
1105 	node.sysctl_data = &newsize;
1106 
1107 	newsize = seminfo.semmni;
1108 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1109 	if (error || newp == NULL)
1110 		return error;
1111 
1112 	return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
1113 }
1114 
1115 static int
1116 sysctl_ipc_semmns(SYSCTLFN_ARGS)
1117 {
1118 	int newsize, error;
1119 	struct sysctlnode node;
1120 	node = *rnode;
1121 	node.sysctl_data = &newsize;
1122 
1123 	newsize = seminfo.semmns;
1124 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1125 	if (error || newp == NULL)
1126 		return error;
1127 
1128 	return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
1129 }
1130 
1131 static int
1132 sysctl_ipc_semmnu(SYSCTLFN_ARGS)
1133 {
1134 	int newsize, error;
1135 	struct sysctlnode node;
1136 	node = *rnode;
1137 	node.sysctl_data = &newsize;
1138 
1139 	newsize = seminfo.semmnu;
1140 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1141 	if (error || newp == NULL)
1142 		return error;
1143 
1144 	return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
1145 }
1146 
1147 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
1148 {
1149 	const struct sysctlnode *node = NULL;
1150 
1151 	sysctl_createv(clog, 0, NULL, NULL,
1152 		CTLFLAG_PERMANENT,
1153 		CTLTYPE_NODE, "kern", NULL,
1154 		NULL, 0, NULL, 0,
1155 		CTL_KERN, CTL_EOL);
1156 	sysctl_createv(clog, 0, NULL, &node,
1157 		CTLFLAG_PERMANENT,
1158 		CTLTYPE_NODE, "ipc",
1159 		SYSCTL_DESCR("SysV IPC options"),
1160 		NULL, 0, NULL, 0,
1161 		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1162 
1163 	if (node == NULL)
1164 		return;
1165 
1166 	sysctl_createv(clog, 0, &node, NULL,
1167 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1168 		CTLTYPE_INT, "semmni",
1169 		SYSCTL_DESCR("Max number of number of semaphore identifiers"),
1170 		sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
1171 		CTL_CREATE, CTL_EOL);
1172 	sysctl_createv(clog, 0, &node, NULL,
1173 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1174 		CTLTYPE_INT, "semmns",
1175 		SYSCTL_DESCR("Max number of number of semaphores in system"),
1176 		sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
1177 		CTL_CREATE, CTL_EOL);
1178 	sysctl_createv(clog, 0, &node, NULL,
1179 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1180 		CTLTYPE_INT, "semmnu",
1181 		SYSCTL_DESCR("Max number of undo structures in system"),
1182 		sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
1183 		CTL_CREATE, CTL_EOL);
1184 }
1185