xref: /netbsd-src/sys/kern/uipc_sem.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /*	$NetBSD: uipc_sem.c,v 1.58 2019/12/17 18:16:05 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 /*
59  * Implementation of POSIX semaphore.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.58 2019/12/17 18:16:05 ad Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/kernel.h>
67 
68 #include <sys/atomic.h>
69 #include <sys/proc.h>
70 #include <sys/lwp.h>
71 #include <sys/ksem.h>
72 #include <sys/syscall.h>
73 #include <sys/stat.h>
74 #include <sys/kmem.h>
75 #include <sys/fcntl.h>
76 #include <sys/file.h>
77 #include <sys/filedesc.h>
78 #include <sys/kauth.h>
79 #include <sys/module.h>
80 #include <sys/mount.h>
81 #include <sys/mutex.h>
82 #include <sys/rwlock.h>
83 #include <sys/semaphore.h>
84 #include <sys/syscall.h>
85 #include <sys/syscallargs.h>
86 #include <sys/syscallvar.h>
87 #include <sys/sysctl.h>
88 #include <sys/uidinfo.h>
89 #include <sys/cprng.h>
90 
91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
92 
93 #define	SEM_MAX_NAMELEN		NAME_MAX
94 
95 #define	KS_UNLINKED		0x01
96 
97 static kmutex_t		ksem_lock	__cacheline_aligned;
98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
99 static u_int		nsems_total	__cacheline_aligned;
100 static u_int		nsems		__cacheline_aligned;
101 
102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
104 static u_long		ksem_pshared_hashmask __read_mostly;
105 
106 #define	KSEM_PSHARED_HASHSIZE	32
107 
108 static kauth_listener_t	ksem_listener;
109 
110 static int		ksem_sysinit(void);
111 static int		ksem_sysfini(bool);
112 static int		ksem_modcmd(modcmd_t, void *);
113 static int		ksem_close_fop(file_t *);
114 static int		ksem_stat_fop(file_t *, struct stat *);
115 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
116     kauth_cred_t, int);
117 
118 static const struct fileops semops = {
119 	.fo_name = "sem",
120 	.fo_read = ksem_read_fop,
121 	.fo_write = fbadop_write,
122 	.fo_ioctl = fbadop_ioctl,
123 	.fo_fcntl = fnullop_fcntl,
124 	.fo_poll = fnullop_poll,
125 	.fo_stat = ksem_stat_fop,
126 	.fo_close = ksem_close_fop,
127 	.fo_kqfilter = fnullop_kqfilter,
128 	.fo_restart = fnullop_restart,
129 };
130 
131 static const struct syscall_package ksem_syscalls[] = {
132 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
133 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
134 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
135 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
136 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
137 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
138 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
139 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
140 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
141 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
142 	{ 0, 0, NULL },
143 };
144 
145 struct sysctllog *ksem_clog;
146 int ksem_max = KSEM_MAX;
147 
148 static int
149 name_copyin(const char *uname, char **name)
150 {
151 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
152 
153 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
154 	if (error)
155 		kmem_free(*name, SEM_MAX_NAMELEN);
156 
157 	return error;
158 }
159 
160 static void
161 name_destroy(char **name)
162 {
163 	if (!*name)
164 		return;
165 
166 	kmem_free(*name, SEM_MAX_NAMELEN);
167 	*name = NULL;
168 }
169 
170 static int
171 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
172     void *arg0, void *arg1, void *arg2, void *arg3)
173 {
174 	ksem_t *ks;
175 	mode_t mode;
176 
177 	if (action != KAUTH_SYSTEM_SEMAPHORE)
178 		return KAUTH_RESULT_DEFER;
179 
180 	ks = arg1;
181 	mode = ks->ks_mode;
182 
183 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
184 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
185 	    (mode & S_IWOTH) != 0)
186 		return KAUTH_RESULT_ALLOW;
187 
188 	return KAUTH_RESULT_DEFER;
189 }
190 
191 static int
192 ksem_sysinit(void)
193 {
194 	int error;
195 	const struct sysctlnode *rnode;
196 
197 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
198 	LIST_INIT(&ksem_head);
199 	nsems_total = 0;
200 	nsems = 0;
201 
202 	rw_init(&ksem_pshared_lock);
203 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
204 	    true, &ksem_pshared_hashmask);
205 	KASSERT(ksem_pshared_hashtab != NULL);
206 
207 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
208 	    ksem_listener_cb, NULL);
209 
210 	/* Define module-specific sysctl tree */
211 
212 	ksem_clog = NULL;
213 
214 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
215 			CTLFLAG_PERMANENT,
216 			CTLTYPE_NODE, "posix",
217 			SYSCTL_DESCR("POSIX options"),
218 			NULL, 0, NULL, 0,
219 			CTL_KERN, CTL_CREATE, CTL_EOL);
220 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
221 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
222 			CTLTYPE_INT, "semmax",
223 			SYSCTL_DESCR("Maximal number of semaphores"),
224 			NULL, 0, &ksem_max, 0,
225 			CTL_CREATE, CTL_EOL);
226 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
227 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
228 			CTLTYPE_INT, "semcnt",
229 			SYSCTL_DESCR("Current number of semaphores"),
230 			NULL, 0, &nsems, 0,
231 			CTL_CREATE, CTL_EOL);
232 
233 	error = syscall_establish(NULL, ksem_syscalls);
234 	if (error) {
235 		(void)ksem_sysfini(false);
236 	}
237 
238 	return error;
239 }
240 
241 static int
242 ksem_sysfini(bool interface)
243 {
244 	int error;
245 
246 	if (interface) {
247 		error = syscall_disestablish(NULL, ksem_syscalls);
248 		if (error != 0) {
249 			return error;
250 		}
251 		/*
252 		 * Make sure that no semaphores are in use.  Note: semops
253 		 * must be unused at this point.
254 		 */
255 		if (nsems_total) {
256 			error = syscall_establish(NULL, ksem_syscalls);
257 			KASSERT(error == 0);
258 			return EBUSY;
259 		}
260 	}
261 	kauth_unlisten_scope(ksem_listener);
262 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
263 	rw_destroy(&ksem_pshared_lock);
264 	mutex_destroy(&ksem_lock);
265 	sysctl_teardown(&ksem_clog);
266 	return 0;
267 }
268 
269 static int
270 ksem_modcmd(modcmd_t cmd, void *arg)
271 {
272 
273 	switch (cmd) {
274 	case MODULE_CMD_INIT:
275 		return ksem_sysinit();
276 
277 	case MODULE_CMD_FINI:
278 		return ksem_sysfini(true);
279 
280 	default:
281 		return ENOTTY;
282 	}
283 }
284 
285 static ksem_t *
286 ksem_lookup(const char *name)
287 {
288 	ksem_t *ks;
289 
290 	KASSERT(mutex_owned(&ksem_lock));
291 
292 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
293 		if (strcmp(ks->ks_name, name) == 0) {
294 			mutex_enter(&ks->ks_lock);
295 			return ks;
296 		}
297 	}
298 	return NULL;
299 }
300 
301 static int
302 ksem_perm(lwp_t *l, ksem_t *ks)
303 {
304 	kauth_cred_t uc = l->l_cred;
305 
306 	KASSERT(mutex_owned(&ks->ks_lock));
307 
308 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
309 		return EACCES;
310 
311 	return 0;
312 }
313 
314 /*
315  * Bits 1..23 are random, just pluck a few of those and assume the
316  * distribution is going to be pretty good.
317  */
318 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
319 
320 static void
321 ksem_remove_pshared(ksem_t *ksem)
322 {
323 	rw_enter(&ksem_pshared_lock, RW_WRITER);
324 	LIST_REMOVE(ksem, ks_entry);
325 	rw_exit(&ksem_pshared_lock);
326 }
327 
328 static ksem_t *
329 ksem_lookup_pshared_locked(intptr_t id)
330 {
331 	u_long bucket = KSEM_PSHARED_HASH(id);
332 	ksem_t *ksem = NULL;
333 
334 	/* ksem_t is locked and referenced upon return. */
335 
336 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
337 		if (ksem->ks_pshared_id == id) {
338 			mutex_enter(&ksem->ks_lock);
339 			if (ksem->ks_pshared_proc == NULL) {
340 				/*
341 				 * This entry is dead, and in the process
342 				 * of being torn down; skip it.
343 				 */
344 				mutex_exit(&ksem->ks_lock);
345 				continue;
346 			}
347 			ksem->ks_ref++;
348 			KASSERT(ksem->ks_ref != 0);
349 			return ksem;
350 		}
351 	}
352 
353 	return NULL;
354 }
355 
356 static ksem_t *
357 ksem_lookup_pshared(intptr_t id)
358 {
359 	rw_enter(&ksem_pshared_lock, RW_READER);
360 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
361 	rw_exit(&ksem_pshared_lock);
362 	return ksem;
363 }
364 
365 static void
366 ksem_alloc_pshared_id(ksem_t *ksem)
367 {
368 	uint32_t try;
369 
370 	KASSERT(ksem->ks_pshared_proc != NULL);
371 
372 	rw_enter(&ksem_pshared_lock, RW_WRITER);
373 	for (;;) {
374 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
375 		    KSEM_PSHARED_MARKER;
376 
377 		if (ksem_lookup_pshared_locked(try) == NULL) {
378 			/* Got it! */
379 			break;
380 		}
381 	}
382 	ksem->ks_pshared_id = try;
383 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
384 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
385 	rw_exit(&ksem_pshared_lock);
386 }
387 
388 /*
389  * ksem_get: get the semaphore from the descriptor.
390  *
391  * => locks the semaphore, if found, and holds an extra reference.
392  * => holds a reference on the file descriptor.
393  */
394 static int
395 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
396 {
397 	ksem_t *ks;
398 	int fd;
399 
400 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
401 		/*
402 		 * ksem_lookup_pshared() returns the ksem_t *
403 		 * locked and referenced.
404 		 */
405 		ks = ksem_lookup_pshared(id);
406 		if (ks == NULL)
407 			return EINVAL;
408 		KASSERT(ks->ks_pshared_id == id);
409 		KASSERT(ks->ks_pshared_proc != NULL);
410 		fd = -1;
411 	} else if (id <= INT_MAX) {
412 		fd = (int)id;
413 		file_t *fp = fd_getfile(fd);
414 
415 		if (__predict_false(fp == NULL))
416 			return EINVAL;
417 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
418 			fd_putfile(fd);
419 			return EINVAL;
420 		}
421 		ks = fp->f_ksem;
422 		mutex_enter(&ks->ks_lock);
423 		ks->ks_ref++;
424 	} else {
425 		return EINVAL;
426 	}
427 
428 	*ksret = ks;
429 	*fdp = fd;
430 	return 0;
431 }
432 
433 /*
434  * ksem_create: allocate and setup a new semaphore structure.
435  */
436 static int
437 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
438 {
439 	ksem_t *ks;
440 	kauth_cred_t uc;
441 	char *kname;
442 	size_t len;
443 
444 	/* Pre-check for the limit. */
445 	if (nsems >= ksem_max) {
446 		return ENFILE;
447 	}
448 
449 	if (val > SEM_VALUE_MAX) {
450 		return EINVAL;
451 	}
452 
453 	if (name != NULL) {
454 		len = strlen(name);
455 		if (len > SEM_MAX_NAMELEN) {
456 			return ENAMETOOLONG;
457 		}
458 		/* Name must start with a '/' but not contain one. */
459 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
460 			return EINVAL;
461 		}
462 		kname = kmem_alloc(++len, KM_SLEEP);
463 		strlcpy(kname, name, len);
464 	} else {
465 		kname = NULL;
466 		len = 0;
467 	}
468 
469 	chgsemcnt(kauth_cred_getuid(l->l_cred), 1);
470 
471 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
472 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
473 	cv_init(&ks->ks_cv, "psem");
474 	ks->ks_name = kname;
475 	ks->ks_namelen = len;
476 	ks->ks_mode = mode;
477 	ks->ks_value = val;
478 	ks->ks_ref = 1;
479 
480 	uc = l->l_cred;
481 	ks->ks_uid = kauth_cred_geteuid(uc);
482 	ks->ks_gid = kauth_cred_getegid(uc);
483 
484 	atomic_inc_uint(&nsems_total);
485 	*ksret = ks;
486 	return 0;
487 }
488 
489 static void
490 ksem_free(ksem_t *ks)
491 {
492 
493 	KASSERT(!cv_has_waiters(&ks->ks_cv));
494 
495 	if (ks->ks_pshared_id) {
496 		KASSERT(ks->ks_pshared_proc == NULL);
497 		ksem_remove_pshared(ks);
498 	}
499 	if (ks->ks_name) {
500 		KASSERT(ks->ks_namelen > 0);
501 		kmem_free(ks->ks_name, ks->ks_namelen);
502 	}
503 	mutex_destroy(&ks->ks_lock);
504 	cv_destroy(&ks->ks_cv);
505 	kmem_free(ks, sizeof(ksem_t));
506 
507 	atomic_dec_uint(&nsems_total);
508 	chgsemcnt(kauth_cred_getuid(curproc->p_cred), -1);
509 }
510 
511 #define	KSEM_ID_IS_PSHARED(id)		\
512 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
513 
514 static void
515 ksem_release(ksem_t *ksem, int fd)
516 {
517 	bool destroy = false;
518 
519 	KASSERT(mutex_owned(&ksem->ks_lock));
520 
521 	KASSERT(ksem->ks_ref > 0);
522 	if (--ksem->ks_ref == 0) {
523 		/*
524 		 * Destroy if the last reference and semaphore is unnamed,
525 		 * or unlinked (for named semaphore).
526 		 */
527 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
528 		    (ksem->ks_name == NULL);
529 	}
530 	mutex_exit(&ksem->ks_lock);
531 
532 	if (destroy) {
533 		ksem_free(ksem);
534 	}
535 	if (fd != -1) {
536 		fd_putfile(fd);
537 	}
538 }
539 
540 int
541 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
542     register_t *retval)
543 {
544 	/* {
545 		unsigned int value;
546 		intptr_t *idp;
547 	} */
548 
549 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
550 	    copyin, copyout);
551 }
552 
553 int
554 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
555     copyout_t docopyout)
556 {
557 	proc_t *p = l->l_proc;
558 	ksem_t *ks;
559 	file_t *fp;
560 	intptr_t id, arg;
561 	int fd, error;
562 
563 	/*
564 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
565 	 * indicate that a pshared semaphore is wanted.  In that case we
566 	 * allocate globally unique ID and return that, rather than the
567 	 * process-scoped file descriptor ID.
568 	 */
569 	error = (*docopyin)(idp, &arg, sizeof(*idp));
570 	if (error) {
571 		return error;
572 	}
573 
574 	error = fd_allocfile(&fp, &fd);
575 	if (error) {
576 		return error;
577 	}
578 	fp->f_type = DTYPE_SEM;
579 	fp->f_flag = FREAD | FWRITE;
580 	fp->f_ops = &semops;
581 
582 	if (fd >= KSEM_MARKER_MIN) {
583 		/*
584 		 * This is super-unlikely, but we check for it anyway
585 		 * because potential collisions with the pshared marker
586 		 * would be bad.
587 		 */
588 		fd_abort(p, fp, fd);
589 		return EMFILE;
590 	}
591 
592 	/* Note the mode does not matter for anonymous semaphores. */
593 	error = ksem_create(l, NULL, &ks, 0, val);
594 	if (error) {
595 		fd_abort(p, fp, fd);
596 		return error;
597 	}
598 
599 	if (arg == KSEM_PSHARED) {
600 		ks->ks_pshared_proc = curproc;
601 		ks->ks_pshared_fd = fd;
602 		ksem_alloc_pshared_id(ks);
603 		id = ks->ks_pshared_id;
604 	} else {
605 		id = (intptr_t)fd;
606 	}
607 
608 	error = (*docopyout)(&id, idp, sizeof(*idp));
609 	if (error) {
610 		ksem_free(ks);
611 		fd_abort(p, fp, fd);
612 		return error;
613 	}
614 
615 	fp->f_ksem = ks;
616 	fd_affix(p, fp, fd);
617 	return error;
618 }
619 
620 int
621 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
622     register_t *retval)
623 {
624 	/* {
625 		const char *name;
626 		int oflag;
627 		mode_t mode;
628 		unsigned int value;
629 		intptr_t *idp;
630 	} */
631 
632 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
633 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
634 }
635 
636 int
637 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
638      unsigned int value, intptr_t *idp, copyout_t docopyout)
639 {
640 	char *name;
641 	proc_t *p = l->l_proc;
642 	ksem_t *ksnew = NULL, *ks;
643 	file_t *fp;
644 	intptr_t id;
645 	int fd, error;
646 
647 	error = name_copyin(semname, &name);
648 	if (error) {
649 		return error;
650 	}
651 	error = fd_allocfile(&fp, &fd);
652 	if (error) {
653 		name_destroy(&name);
654 		return error;
655 	}
656 	fp->f_type = DTYPE_SEM;
657 	fp->f_flag = FREAD | FWRITE;
658 	fp->f_ops = &semops;
659 
660 	if (fd >= KSEM_MARKER_MIN) {
661 		/*
662 		 * This is super-unlikely, but we check for it anyway
663 		 * because potential collisions with the pshared marker
664 		 * would be bad.
665 		 */
666 		fd_abort(p, fp, fd);
667 		return EMFILE;
668 	}
669 
670 	/*
671 	 * The ID (file descriptor number) can be stored early.
672 	 * Note that zero is a special value for libpthread.
673 	 */
674 	id = (intptr_t)fd;
675 	error = (*docopyout)(&id, idp, sizeof(*idp));
676 	if (error) {
677 		goto err;
678 	}
679 
680 	if (oflag & O_CREAT) {
681 		/* Create a new semaphore. */
682 		error = ksem_create(l, name, &ksnew, mode, value);
683 		if (error) {
684 			goto err;
685 		}
686 		KASSERT(ksnew != NULL);
687 	}
688 
689 	/* Lookup for a semaphore with such name. */
690 	mutex_enter(&ksem_lock);
691 	ks = ksem_lookup(name);
692 	name_destroy(&name);
693 	if (ks) {
694 		KASSERT(mutex_owned(&ks->ks_lock));
695 		mutex_exit(&ksem_lock);
696 
697 		/* Check for exclusive create. */
698 		if (oflag & O_EXCL) {
699 			mutex_exit(&ks->ks_lock);
700 			error = EEXIST;
701 			goto err;
702 		}
703 		/*
704 		 * Verify permissions.  If we can access it,
705 		 * add the reference of this thread.
706 		 */
707 		error = ksem_perm(l, ks);
708 		if (error == 0) {
709 			ks->ks_ref++;
710 		}
711 		mutex_exit(&ks->ks_lock);
712 		if (error) {
713 			goto err;
714 		}
715 	} else {
716 		/* Fail if not found and not creating. */
717 		if ((oflag & O_CREAT) == 0) {
718 			mutex_exit(&ksem_lock);
719 			KASSERT(ksnew == NULL);
720 			error = ENOENT;
721 			goto err;
722 		}
723 
724 		/* Check for the limit locked. */
725 		if (nsems >= ksem_max) {
726 			mutex_exit(&ksem_lock);
727 			error = ENFILE;
728 			goto err;
729 		}
730 
731 		/*
732 		 * Finally, insert semaphore into the list.
733 		 * Note: it already has the initial reference.
734 		 */
735 		ks = ksnew;
736 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
737 		nsems++;
738 		mutex_exit(&ksem_lock);
739 
740 		ksnew = NULL;
741 	}
742 	KASSERT(ks != NULL);
743 	fp->f_ksem = ks;
744 	fd_affix(p, fp, fd);
745 err:
746 	name_destroy(&name);
747 	if (error) {
748 		fd_abort(p, fp, fd);
749 	}
750 	if (ksnew) {
751 		ksem_free(ksnew);
752 	}
753 	return error;
754 }
755 
756 int
757 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
758     register_t *retval)
759 {
760 	/* {
761 		intptr_t id;
762 	} */
763 	intptr_t id = SCARG(uap, id);
764 	int fd, error;
765 	ksem_t *ks;
766 
767 	error = ksem_get(id, &ks, &fd);
768 	if (error) {
769 		return error;
770 	}
771 
772 	/* This is only for named semaphores. */
773 	if (ks->ks_name == NULL) {
774 		error = EINVAL;
775 	}
776 	ksem_release(ks, -1);
777 	if (error) {
778 		if (fd != -1)
779 			fd_putfile(fd);
780 		return error;
781 	}
782 	return fd_close(fd);
783 }
784 
785 static int
786 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
787     int flags)
788 {
789 	size_t len;
790 	char *name;
791 	ksem_t *ks = fp->f_ksem;
792 
793 	mutex_enter(&ks->ks_lock);
794 	len = ks->ks_namelen;
795 	name = ks->ks_name;
796 	mutex_exit(&ks->ks_lock);
797 	if (name == NULL || len == 0)
798 		return 0;
799 	return uiomove(name, len, uio);
800 }
801 
802 static int
803 ksem_stat_fop(file_t *fp, struct stat *ub)
804 {
805 	ksem_t *ks = fp->f_ksem;
806 
807 	mutex_enter(&ks->ks_lock);
808 
809 	memset(ub, 0, sizeof(*ub));
810 
811 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
812 	    ? _S_IFLNK : _S_IFREG);
813 	ub->st_uid = ks->ks_uid;
814 	ub->st_gid = ks->ks_gid;
815 	ub->st_size = ks->ks_value;
816 	ub->st_blocks = (ub->st_size) ? 1 : 0;
817 	ub->st_nlink = ks->ks_ref;
818 	ub->st_blksize = 4096;
819 
820 	nanotime(&ub->st_atimespec);
821 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
822 	    ub->st_atimespec;
823 
824 	/*
825 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
826 	 * XXX (st_dev, st_ino) should be unique.
827 	 */
828 	mutex_exit(&ks->ks_lock);
829 	return 0;
830 }
831 
832 static int
833 ksem_close_fop(file_t *fp)
834 {
835 	ksem_t *ks = fp->f_ksem;
836 
837 	mutex_enter(&ks->ks_lock);
838 
839 	if (ks->ks_pshared_id) {
840 		if (ks->ks_pshared_proc != curproc) {
841 			/* Do nothing if this is not the creator. */
842 			mutex_exit(&ks->ks_lock);
843 			return 0;
844 		}
845 		/* Mark this semaphore as dead. */
846 		ks->ks_pshared_proc = NULL;
847 	}
848 
849 	ksem_release(ks, -1);
850 	return 0;
851 }
852 
853 int
854 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
855     register_t *retval)
856 {
857 	/* {
858 		const char *name;
859 	} */
860 	char *name;
861 	ksem_t *ks;
862 	u_int refcnt;
863 	int error;
864 
865 	error = name_copyin(SCARG(uap, name), &name);
866 	if (error)
867 		return error;
868 
869 	mutex_enter(&ksem_lock);
870 	ks = ksem_lookup(name);
871 	name_destroy(&name);
872 	if (ks == NULL) {
873 		mutex_exit(&ksem_lock);
874 		return ENOENT;
875 	}
876 	KASSERT(mutex_owned(&ks->ks_lock));
877 
878 	/* Verify permissions. */
879 	error = ksem_perm(l, ks);
880 	if (error) {
881 		mutex_exit(&ks->ks_lock);
882 		mutex_exit(&ksem_lock);
883 		return error;
884 	}
885 
886 	/* Remove from the global list. */
887 	LIST_REMOVE(ks, ks_entry);
888 	nsems--;
889 	mutex_exit(&ksem_lock);
890 
891 	refcnt = ks->ks_ref;
892 	if (refcnt) {
893 		/* Mark as unlinked, if there are references. */
894 		ks->ks_flags |= KS_UNLINKED;
895 	}
896 	mutex_exit(&ks->ks_lock);
897 
898 	if (refcnt == 0) {
899 		ksem_free(ks);
900 	}
901 	return 0;
902 }
903 
904 int
905 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
906     register_t *retval)
907 {
908 	/* {
909 		intptr_t id;
910 	} */
911 	int fd, error;
912 	ksem_t *ks;
913 
914 	error = ksem_get(SCARG(uap, id), &ks, &fd);
915 	if (error) {
916 		return error;
917 	}
918 	KASSERT(mutex_owned(&ks->ks_lock));
919 	if (ks->ks_value == SEM_VALUE_MAX) {
920 		error = EOVERFLOW;
921 		goto out;
922 	}
923 	ks->ks_value++;
924 	if (ks->ks_waiters) {
925 		cv_broadcast(&ks->ks_cv);
926 	}
927 out:
928 	ksem_release(ks, fd);
929 	return error;
930 }
931 
932 int
933 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
934 {
935 	int fd, error, timeo;
936 	ksem_t *ks;
937 
938 	error = ksem_get(id, &ks, &fd);
939 	if (error) {
940 		return error;
941 	}
942 	KASSERT(mutex_owned(&ks->ks_lock));
943 	while (ks->ks_value == 0) {
944 		ks->ks_waiters++;
945 		if (!try_p && abstime != NULL) {
946 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
947 			    &timeo, NULL);
948 			if (error != 0)
949 				goto out;
950 		} else {
951 			timeo = 0;
952 		}
953 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
954 		    &ks->ks_lock, timeo);
955 		ks->ks_waiters--;
956 		if (error)
957 			goto out;
958 	}
959 	ks->ks_value--;
960 out:
961 	ksem_release(ks, fd);
962 	return error;
963 }
964 
965 int
966 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
967     register_t *retval)
968 {
969 	/* {
970 		intptr_t id;
971 	} */
972 
973 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
974 }
975 
976 int
977 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
978     register_t *retval)
979 {
980 	/* {
981 		intptr_t id;
982 		const struct timespec *abstime;
983 	} */
984 	struct timespec ts;
985 	int error;
986 
987 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
988 	if (error != 0)
989 		return error;
990 
991 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
992 		return EINVAL;
993 
994 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
995 	if (error == EWOULDBLOCK)
996 		error = ETIMEDOUT;
997 	return error;
998 }
999 
1000 int
1001 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
1002     register_t *retval)
1003 {
1004 	/* {
1005 		intptr_t id;
1006 	} */
1007 
1008 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
1009 }
1010 
1011 int
1012 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
1013     register_t *retval)
1014 {
1015 	/* {
1016 		intptr_t id;
1017 		unsigned int *value;
1018 	} */
1019 	int fd, error;
1020 	ksem_t *ks;
1021 	unsigned int val;
1022 
1023 	error = ksem_get(SCARG(uap, id), &ks, &fd);
1024 	if (error) {
1025 		return error;
1026 	}
1027 	KASSERT(mutex_owned(&ks->ks_lock));
1028 	val = ks->ks_value;
1029 	ksem_release(ks, fd);
1030 
1031 	return copyout(&val, SCARG(uap, value), sizeof(val));
1032 }
1033 
1034 int
1035 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
1036     register_t *retval)
1037 {
1038 	/* {
1039 		intptr_t id;
1040 	} */
1041 	int fd, error;
1042 	ksem_t *ks;
1043 
1044 	intptr_t id = SCARG(uap, id);
1045 
1046 	error = ksem_get(id, &ks, &fd);
1047 	if (error) {
1048 		return error;
1049 	}
1050 	KASSERT(mutex_owned(&ks->ks_lock));
1051 
1052 	/* Operation is only for unnamed semaphores. */
1053 	if (ks->ks_name != NULL) {
1054 		error = EINVAL;
1055 		goto out;
1056 	}
1057 	/* Cannot destroy if there are waiters. */
1058 	if (ks->ks_waiters) {
1059 		error = EBUSY;
1060 		goto out;
1061 	}
1062 	if (KSEM_ID_IS_PSHARED(id)) {
1063 		/* Cannot destroy if we did't create it. */
1064 		KASSERT(fd == -1);
1065 		KASSERT(ks->ks_pshared_proc != NULL);
1066 		if (ks->ks_pshared_proc != curproc) {
1067 			error = EINVAL;
1068 			goto out;
1069 		}
1070 		fd = ks->ks_pshared_fd;
1071 
1072 		/* Mark it dead so subsequent lookups fail. */
1073 		ks->ks_pshared_proc = NULL;
1074 
1075 		/* Do an fd_getfile() to for the benefit of fd_close(). */
1076 		file_t *fp __diagused = fd_getfile(fd);
1077 		KASSERT(fp != NULL);
1078 		KASSERT(fp->f_ksem == ks);
1079 	}
1080 out:
1081 	ksem_release(ks, -1);
1082 	if (error) {
1083 		if (!KSEM_ID_IS_PSHARED(id))
1084 			fd_putfile(fd);
1085 		return error;
1086 	}
1087 	return fd_close(fd);
1088 }
1089