xref: /netbsd-src/sys/kern/uipc_sem.c (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /*	$NetBSD: uipc_sem.c,v 1.59 2020/05/04 13:58:48 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 /*
59  * Implementation of POSIX semaphore.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.59 2020/05/04 13:58:48 riastradh Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/kernel.h>
67 
68 #include <sys/atomic.h>
69 #include <sys/proc.h>
70 #include <sys/lwp.h>
71 #include <sys/ksem.h>
72 #include <sys/syscall.h>
73 #include <sys/stat.h>
74 #include <sys/kmem.h>
75 #include <sys/fcntl.h>
76 #include <sys/file.h>
77 #include <sys/filedesc.h>
78 #include <sys/kauth.h>
79 #include <sys/module.h>
80 #include <sys/mount.h>
81 #include <sys/mutex.h>
82 #include <sys/rwlock.h>
83 #include <sys/semaphore.h>
84 #include <sys/syscall.h>
85 #include <sys/syscallargs.h>
86 #include <sys/syscallvar.h>
87 #include <sys/sysctl.h>
88 #include <sys/uidinfo.h>
89 #include <sys/cprng.h>
90 
91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
92 
93 #define	SEM_MAX_NAMELEN		NAME_MAX
94 
95 #define	KS_UNLINKED		0x01
96 
97 static kmutex_t		ksem_lock	__cacheline_aligned;
98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
99 static u_int		nsems_total	__cacheline_aligned;
100 static u_int		nsems		__cacheline_aligned;
101 
102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
104 static u_long		ksem_pshared_hashmask __read_mostly;
105 
106 #define	KSEM_PSHARED_HASHSIZE	32
107 
108 static kauth_listener_t	ksem_listener;
109 
110 static int		ksem_sysinit(void);
111 static int		ksem_sysfini(bool);
112 static int		ksem_modcmd(modcmd_t, void *);
113 static void		ksem_release(ksem_t *, int);
114 static int		ksem_close_fop(file_t *);
115 static int		ksem_stat_fop(file_t *, struct stat *);
116 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
117     kauth_cred_t, int);
118 
119 static const struct fileops semops = {
120 	.fo_name = "sem",
121 	.fo_read = ksem_read_fop,
122 	.fo_write = fbadop_write,
123 	.fo_ioctl = fbadop_ioctl,
124 	.fo_fcntl = fnullop_fcntl,
125 	.fo_poll = fnullop_poll,
126 	.fo_stat = ksem_stat_fop,
127 	.fo_close = ksem_close_fop,
128 	.fo_kqfilter = fnullop_kqfilter,
129 	.fo_restart = fnullop_restart,
130 };
131 
132 static const struct syscall_package ksem_syscalls[] = {
133 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
134 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
135 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
136 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
137 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
138 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
139 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
140 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
141 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
142 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
143 	{ 0, 0, NULL },
144 };
145 
146 struct sysctllog *ksem_clog;
147 int ksem_max = KSEM_MAX;
148 
149 static int
150 name_copyin(const char *uname, char **name)
151 {
152 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
153 
154 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
155 	if (error)
156 		kmem_free(*name, SEM_MAX_NAMELEN);
157 
158 	return error;
159 }
160 
161 static void
162 name_destroy(char **name)
163 {
164 	if (!*name)
165 		return;
166 
167 	kmem_free(*name, SEM_MAX_NAMELEN);
168 	*name = NULL;
169 }
170 
171 static int
172 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
173     void *arg0, void *arg1, void *arg2, void *arg3)
174 {
175 	ksem_t *ks;
176 	mode_t mode;
177 
178 	if (action != KAUTH_SYSTEM_SEMAPHORE)
179 		return KAUTH_RESULT_DEFER;
180 
181 	ks = arg1;
182 	mode = ks->ks_mode;
183 
184 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
185 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
186 	    (mode & S_IWOTH) != 0)
187 		return KAUTH_RESULT_ALLOW;
188 
189 	return KAUTH_RESULT_DEFER;
190 }
191 
192 static int
193 ksem_sysinit(void)
194 {
195 	int error;
196 	const struct sysctlnode *rnode;
197 
198 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
199 	LIST_INIT(&ksem_head);
200 	nsems_total = 0;
201 	nsems = 0;
202 
203 	rw_init(&ksem_pshared_lock);
204 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
205 	    true, &ksem_pshared_hashmask);
206 	KASSERT(ksem_pshared_hashtab != NULL);
207 
208 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
209 	    ksem_listener_cb, NULL);
210 
211 	/* Define module-specific sysctl tree */
212 
213 	ksem_clog = NULL;
214 
215 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
216 			CTLFLAG_PERMANENT,
217 			CTLTYPE_NODE, "posix",
218 			SYSCTL_DESCR("POSIX options"),
219 			NULL, 0, NULL, 0,
220 			CTL_KERN, CTL_CREATE, CTL_EOL);
221 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
222 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
223 			CTLTYPE_INT, "semmax",
224 			SYSCTL_DESCR("Maximal number of semaphores"),
225 			NULL, 0, &ksem_max, 0,
226 			CTL_CREATE, CTL_EOL);
227 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
228 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
229 			CTLTYPE_INT, "semcnt",
230 			SYSCTL_DESCR("Current number of semaphores"),
231 			NULL, 0, &nsems, 0,
232 			CTL_CREATE, CTL_EOL);
233 
234 	error = syscall_establish(NULL, ksem_syscalls);
235 	if (error) {
236 		(void)ksem_sysfini(false);
237 	}
238 
239 	return error;
240 }
241 
242 static int
243 ksem_sysfini(bool interface)
244 {
245 	int error;
246 
247 	if (interface) {
248 		error = syscall_disestablish(NULL, ksem_syscalls);
249 		if (error != 0) {
250 			return error;
251 		}
252 		/*
253 		 * Make sure that no semaphores are in use.  Note: semops
254 		 * must be unused at this point.
255 		 */
256 		if (nsems_total) {
257 			error = syscall_establish(NULL, ksem_syscalls);
258 			KASSERT(error == 0);
259 			return EBUSY;
260 		}
261 	}
262 	kauth_unlisten_scope(ksem_listener);
263 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
264 	rw_destroy(&ksem_pshared_lock);
265 	mutex_destroy(&ksem_lock);
266 	sysctl_teardown(&ksem_clog);
267 	return 0;
268 }
269 
270 static int
271 ksem_modcmd(modcmd_t cmd, void *arg)
272 {
273 
274 	switch (cmd) {
275 	case MODULE_CMD_INIT:
276 		return ksem_sysinit();
277 
278 	case MODULE_CMD_FINI:
279 		return ksem_sysfini(true);
280 
281 	default:
282 		return ENOTTY;
283 	}
284 }
285 
286 static ksem_t *
287 ksem_lookup(const char *name)
288 {
289 	ksem_t *ks;
290 
291 	KASSERT(mutex_owned(&ksem_lock));
292 
293 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
294 		if (strcmp(ks->ks_name, name) == 0) {
295 			mutex_enter(&ks->ks_lock);
296 			return ks;
297 		}
298 	}
299 	return NULL;
300 }
301 
302 static int
303 ksem_perm(lwp_t *l, ksem_t *ks)
304 {
305 	kauth_cred_t uc = l->l_cred;
306 
307 	KASSERT(mutex_owned(&ks->ks_lock));
308 
309 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
310 		return EACCES;
311 
312 	return 0;
313 }
314 
315 /*
316  * Bits 1..23 are random, just pluck a few of those and assume the
317  * distribution is going to be pretty good.
318  */
319 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
320 
321 static void
322 ksem_remove_pshared(ksem_t *ksem)
323 {
324 	rw_enter(&ksem_pshared_lock, RW_WRITER);
325 	LIST_REMOVE(ksem, ks_entry);
326 	rw_exit(&ksem_pshared_lock);
327 }
328 
329 static ksem_t *
330 ksem_lookup_pshared_locked(intptr_t id)
331 {
332 	u_long bucket = KSEM_PSHARED_HASH(id);
333 	ksem_t *ksem = NULL;
334 
335 	/* ksem_t is locked and referenced upon return. */
336 
337 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
338 		if (ksem->ks_pshared_id == id) {
339 			mutex_enter(&ksem->ks_lock);
340 			if (ksem->ks_pshared_proc == NULL) {
341 				/*
342 				 * This entry is dead, and in the process
343 				 * of being torn down; skip it.
344 				 */
345 				mutex_exit(&ksem->ks_lock);
346 				continue;
347 			}
348 			ksem->ks_ref++;
349 			KASSERT(ksem->ks_ref != 0);
350 			return ksem;
351 		}
352 	}
353 
354 	return NULL;
355 }
356 
357 static ksem_t *
358 ksem_lookup_pshared(intptr_t id)
359 {
360 	rw_enter(&ksem_pshared_lock, RW_READER);
361 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
362 	rw_exit(&ksem_pshared_lock);
363 	return ksem;
364 }
365 
366 static void
367 ksem_alloc_pshared_id(ksem_t *ksem)
368 {
369 	ksem_t *ksem0;
370 	uint32_t try;
371 
372 	KASSERT(ksem->ks_pshared_proc != NULL);
373 
374 	rw_enter(&ksem_pshared_lock, RW_WRITER);
375 	for (;;) {
376 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
377 		    KSEM_PSHARED_MARKER;
378 
379 		if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
380 			/* Got it! */
381 			break;
382 		}
383 		ksem_release(ksem0, -1);
384 	}
385 	ksem->ks_pshared_id = try;
386 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
387 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
388 	rw_exit(&ksem_pshared_lock);
389 }
390 
391 /*
392  * ksem_get: get the semaphore from the descriptor.
393  *
394  * => locks the semaphore, if found, and holds an extra reference.
395  * => holds a reference on the file descriptor.
396  */
397 static int
398 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
399 {
400 	ksem_t *ks;
401 	int fd;
402 
403 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
404 		/*
405 		 * ksem_lookup_pshared() returns the ksem_t *
406 		 * locked and referenced.
407 		 */
408 		ks = ksem_lookup_pshared(id);
409 		if (ks == NULL)
410 			return EINVAL;
411 		KASSERT(ks->ks_pshared_id == id);
412 		KASSERT(ks->ks_pshared_proc != NULL);
413 		fd = -1;
414 	} else if (id <= INT_MAX) {
415 		fd = (int)id;
416 		file_t *fp = fd_getfile(fd);
417 
418 		if (__predict_false(fp == NULL))
419 			return EINVAL;
420 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
421 			fd_putfile(fd);
422 			return EINVAL;
423 		}
424 		ks = fp->f_ksem;
425 		mutex_enter(&ks->ks_lock);
426 		ks->ks_ref++;
427 	} else {
428 		return EINVAL;
429 	}
430 
431 	*ksret = ks;
432 	*fdp = fd;
433 	return 0;
434 }
435 
436 /*
437  * ksem_create: allocate and setup a new semaphore structure.
438  */
439 static int
440 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
441 {
442 	ksem_t *ks;
443 	kauth_cred_t uc;
444 	char *kname;
445 	size_t len;
446 
447 	/* Pre-check for the limit. */
448 	if (nsems >= ksem_max) {
449 		return ENFILE;
450 	}
451 
452 	if (val > SEM_VALUE_MAX) {
453 		return EINVAL;
454 	}
455 
456 	if (name != NULL) {
457 		len = strlen(name);
458 		if (len > SEM_MAX_NAMELEN) {
459 			return ENAMETOOLONG;
460 		}
461 		/* Name must start with a '/' but not contain one. */
462 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
463 			return EINVAL;
464 		}
465 		kname = kmem_alloc(++len, KM_SLEEP);
466 		strlcpy(kname, name, len);
467 	} else {
468 		kname = NULL;
469 		len = 0;
470 	}
471 
472 	chgsemcnt(kauth_cred_getuid(l->l_cred), 1);
473 
474 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
475 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
476 	cv_init(&ks->ks_cv, "psem");
477 	ks->ks_name = kname;
478 	ks->ks_namelen = len;
479 	ks->ks_mode = mode;
480 	ks->ks_value = val;
481 	ks->ks_ref = 1;
482 
483 	uc = l->l_cred;
484 	ks->ks_uid = kauth_cred_geteuid(uc);
485 	ks->ks_gid = kauth_cred_getegid(uc);
486 
487 	atomic_inc_uint(&nsems_total);
488 	*ksret = ks;
489 	return 0;
490 }
491 
492 static void
493 ksem_free(ksem_t *ks)
494 {
495 
496 	KASSERT(!cv_has_waiters(&ks->ks_cv));
497 
498 	if (ks->ks_pshared_id) {
499 		KASSERT(ks->ks_pshared_proc == NULL);
500 		ksem_remove_pshared(ks);
501 	}
502 	if (ks->ks_name) {
503 		KASSERT(ks->ks_namelen > 0);
504 		kmem_free(ks->ks_name, ks->ks_namelen);
505 	}
506 	mutex_destroy(&ks->ks_lock);
507 	cv_destroy(&ks->ks_cv);
508 	kmem_free(ks, sizeof(ksem_t));
509 
510 	atomic_dec_uint(&nsems_total);
511 	chgsemcnt(kauth_cred_getuid(curproc->p_cred), -1);
512 }
513 
514 #define	KSEM_ID_IS_PSHARED(id)		\
515 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
516 
517 static void
518 ksem_release(ksem_t *ksem, int fd)
519 {
520 	bool destroy = false;
521 
522 	KASSERT(mutex_owned(&ksem->ks_lock));
523 
524 	KASSERT(ksem->ks_ref > 0);
525 	if (--ksem->ks_ref == 0) {
526 		/*
527 		 * Destroy if the last reference and semaphore is unnamed,
528 		 * or unlinked (for named semaphore).
529 		 */
530 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
531 		    (ksem->ks_name == NULL);
532 	}
533 	mutex_exit(&ksem->ks_lock);
534 
535 	if (destroy) {
536 		ksem_free(ksem);
537 	}
538 	if (fd != -1) {
539 		fd_putfile(fd);
540 	}
541 }
542 
543 int
544 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
545     register_t *retval)
546 {
547 	/* {
548 		unsigned int value;
549 		intptr_t *idp;
550 	} */
551 
552 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
553 	    copyin, copyout);
554 }
555 
556 int
557 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
558     copyout_t docopyout)
559 {
560 	proc_t *p = l->l_proc;
561 	ksem_t *ks;
562 	file_t *fp;
563 	intptr_t id, arg;
564 	int fd, error;
565 
566 	/*
567 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
568 	 * indicate that a pshared semaphore is wanted.  In that case we
569 	 * allocate globally unique ID and return that, rather than the
570 	 * process-scoped file descriptor ID.
571 	 */
572 	error = (*docopyin)(idp, &arg, sizeof(*idp));
573 	if (error) {
574 		return error;
575 	}
576 
577 	error = fd_allocfile(&fp, &fd);
578 	if (error) {
579 		return error;
580 	}
581 	fp->f_type = DTYPE_SEM;
582 	fp->f_flag = FREAD | FWRITE;
583 	fp->f_ops = &semops;
584 
585 	if (fd >= KSEM_MARKER_MIN) {
586 		/*
587 		 * This is super-unlikely, but we check for it anyway
588 		 * because potential collisions with the pshared marker
589 		 * would be bad.
590 		 */
591 		fd_abort(p, fp, fd);
592 		return EMFILE;
593 	}
594 
595 	/* Note the mode does not matter for anonymous semaphores. */
596 	error = ksem_create(l, NULL, &ks, 0, val);
597 	if (error) {
598 		fd_abort(p, fp, fd);
599 		return error;
600 	}
601 
602 	if (arg == KSEM_PSHARED) {
603 		ks->ks_pshared_proc = curproc;
604 		ks->ks_pshared_fd = fd;
605 		ksem_alloc_pshared_id(ks);
606 		id = ks->ks_pshared_id;
607 	} else {
608 		id = (intptr_t)fd;
609 	}
610 
611 	error = (*docopyout)(&id, idp, sizeof(*idp));
612 	if (error) {
613 		ksem_free(ks);
614 		fd_abort(p, fp, fd);
615 		return error;
616 	}
617 
618 	fp->f_ksem = ks;
619 	fd_affix(p, fp, fd);
620 	return error;
621 }
622 
623 int
624 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
625     register_t *retval)
626 {
627 	/* {
628 		const char *name;
629 		int oflag;
630 		mode_t mode;
631 		unsigned int value;
632 		intptr_t *idp;
633 	} */
634 
635 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
636 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
637 }
638 
639 int
640 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
641      unsigned int value, intptr_t *idp, copyout_t docopyout)
642 {
643 	char *name;
644 	proc_t *p = l->l_proc;
645 	ksem_t *ksnew = NULL, *ks;
646 	file_t *fp;
647 	intptr_t id;
648 	int fd, error;
649 
650 	error = name_copyin(semname, &name);
651 	if (error) {
652 		return error;
653 	}
654 	error = fd_allocfile(&fp, &fd);
655 	if (error) {
656 		name_destroy(&name);
657 		return error;
658 	}
659 	fp->f_type = DTYPE_SEM;
660 	fp->f_flag = FREAD | FWRITE;
661 	fp->f_ops = &semops;
662 
663 	if (fd >= KSEM_MARKER_MIN) {
664 		/*
665 		 * This is super-unlikely, but we check for it anyway
666 		 * because potential collisions with the pshared marker
667 		 * would be bad.
668 		 */
669 		fd_abort(p, fp, fd);
670 		return EMFILE;
671 	}
672 
673 	/*
674 	 * The ID (file descriptor number) can be stored early.
675 	 * Note that zero is a special value for libpthread.
676 	 */
677 	id = (intptr_t)fd;
678 	error = (*docopyout)(&id, idp, sizeof(*idp));
679 	if (error) {
680 		goto err;
681 	}
682 
683 	if (oflag & O_CREAT) {
684 		/* Create a new semaphore. */
685 		error = ksem_create(l, name, &ksnew, mode, value);
686 		if (error) {
687 			goto err;
688 		}
689 		KASSERT(ksnew != NULL);
690 	}
691 
692 	/* Lookup for a semaphore with such name. */
693 	mutex_enter(&ksem_lock);
694 	ks = ksem_lookup(name);
695 	name_destroy(&name);
696 	if (ks) {
697 		KASSERT(mutex_owned(&ks->ks_lock));
698 		mutex_exit(&ksem_lock);
699 
700 		/* Check for exclusive create. */
701 		if (oflag & O_EXCL) {
702 			mutex_exit(&ks->ks_lock);
703 			error = EEXIST;
704 			goto err;
705 		}
706 		/*
707 		 * Verify permissions.  If we can access it,
708 		 * add the reference of this thread.
709 		 */
710 		error = ksem_perm(l, ks);
711 		if (error == 0) {
712 			ks->ks_ref++;
713 		}
714 		mutex_exit(&ks->ks_lock);
715 		if (error) {
716 			goto err;
717 		}
718 	} else {
719 		/* Fail if not found and not creating. */
720 		if ((oflag & O_CREAT) == 0) {
721 			mutex_exit(&ksem_lock);
722 			KASSERT(ksnew == NULL);
723 			error = ENOENT;
724 			goto err;
725 		}
726 
727 		/* Check for the limit locked. */
728 		if (nsems >= ksem_max) {
729 			mutex_exit(&ksem_lock);
730 			error = ENFILE;
731 			goto err;
732 		}
733 
734 		/*
735 		 * Finally, insert semaphore into the list.
736 		 * Note: it already has the initial reference.
737 		 */
738 		ks = ksnew;
739 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
740 		nsems++;
741 		mutex_exit(&ksem_lock);
742 
743 		ksnew = NULL;
744 	}
745 	KASSERT(ks != NULL);
746 	fp->f_ksem = ks;
747 	fd_affix(p, fp, fd);
748 err:
749 	name_destroy(&name);
750 	if (error) {
751 		fd_abort(p, fp, fd);
752 	}
753 	if (ksnew) {
754 		ksem_free(ksnew);
755 	}
756 	return error;
757 }
758 
759 int
760 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
761     register_t *retval)
762 {
763 	/* {
764 		intptr_t id;
765 	} */
766 	intptr_t id = SCARG(uap, id);
767 	int fd, error;
768 	ksem_t *ks;
769 
770 	error = ksem_get(id, &ks, &fd);
771 	if (error) {
772 		return error;
773 	}
774 
775 	/* This is only for named semaphores. */
776 	if (ks->ks_name == NULL) {
777 		error = EINVAL;
778 	}
779 	ksem_release(ks, -1);
780 	if (error) {
781 		if (fd != -1)
782 			fd_putfile(fd);
783 		return error;
784 	}
785 	return fd_close(fd);
786 }
787 
788 static int
789 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
790     int flags)
791 {
792 	size_t len;
793 	char *name;
794 	ksem_t *ks = fp->f_ksem;
795 
796 	mutex_enter(&ks->ks_lock);
797 	len = ks->ks_namelen;
798 	name = ks->ks_name;
799 	mutex_exit(&ks->ks_lock);
800 	if (name == NULL || len == 0)
801 		return 0;
802 	return uiomove(name, len, uio);
803 }
804 
805 static int
806 ksem_stat_fop(file_t *fp, struct stat *ub)
807 {
808 	ksem_t *ks = fp->f_ksem;
809 
810 	mutex_enter(&ks->ks_lock);
811 
812 	memset(ub, 0, sizeof(*ub));
813 
814 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
815 	    ? _S_IFLNK : _S_IFREG);
816 	ub->st_uid = ks->ks_uid;
817 	ub->st_gid = ks->ks_gid;
818 	ub->st_size = ks->ks_value;
819 	ub->st_blocks = (ub->st_size) ? 1 : 0;
820 	ub->st_nlink = ks->ks_ref;
821 	ub->st_blksize = 4096;
822 
823 	nanotime(&ub->st_atimespec);
824 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
825 	    ub->st_atimespec;
826 
827 	/*
828 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
829 	 * XXX (st_dev, st_ino) should be unique.
830 	 */
831 	mutex_exit(&ks->ks_lock);
832 	return 0;
833 }
834 
835 static int
836 ksem_close_fop(file_t *fp)
837 {
838 	ksem_t *ks = fp->f_ksem;
839 
840 	mutex_enter(&ks->ks_lock);
841 
842 	if (ks->ks_pshared_id) {
843 		if (ks->ks_pshared_proc != curproc) {
844 			/* Do nothing if this is not the creator. */
845 			mutex_exit(&ks->ks_lock);
846 			return 0;
847 		}
848 		/* Mark this semaphore as dead. */
849 		ks->ks_pshared_proc = NULL;
850 	}
851 
852 	ksem_release(ks, -1);
853 	return 0;
854 }
855 
856 int
857 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
858     register_t *retval)
859 {
860 	/* {
861 		const char *name;
862 	} */
863 	char *name;
864 	ksem_t *ks;
865 	u_int refcnt;
866 	int error;
867 
868 	error = name_copyin(SCARG(uap, name), &name);
869 	if (error)
870 		return error;
871 
872 	mutex_enter(&ksem_lock);
873 	ks = ksem_lookup(name);
874 	name_destroy(&name);
875 	if (ks == NULL) {
876 		mutex_exit(&ksem_lock);
877 		return ENOENT;
878 	}
879 	KASSERT(mutex_owned(&ks->ks_lock));
880 
881 	/* Verify permissions. */
882 	error = ksem_perm(l, ks);
883 	if (error) {
884 		mutex_exit(&ks->ks_lock);
885 		mutex_exit(&ksem_lock);
886 		return error;
887 	}
888 
889 	/* Remove from the global list. */
890 	LIST_REMOVE(ks, ks_entry);
891 	nsems--;
892 	mutex_exit(&ksem_lock);
893 
894 	refcnt = ks->ks_ref;
895 	if (refcnt) {
896 		/* Mark as unlinked, if there are references. */
897 		ks->ks_flags |= KS_UNLINKED;
898 	}
899 	mutex_exit(&ks->ks_lock);
900 
901 	if (refcnt == 0) {
902 		ksem_free(ks);
903 	}
904 	return 0;
905 }
906 
907 int
908 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
909     register_t *retval)
910 {
911 	/* {
912 		intptr_t id;
913 	} */
914 	int fd, error;
915 	ksem_t *ks;
916 
917 	error = ksem_get(SCARG(uap, id), &ks, &fd);
918 	if (error) {
919 		return error;
920 	}
921 	KASSERT(mutex_owned(&ks->ks_lock));
922 	if (ks->ks_value == SEM_VALUE_MAX) {
923 		error = EOVERFLOW;
924 		goto out;
925 	}
926 	ks->ks_value++;
927 	if (ks->ks_waiters) {
928 		cv_broadcast(&ks->ks_cv);
929 	}
930 out:
931 	ksem_release(ks, fd);
932 	return error;
933 }
934 
935 int
936 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
937 {
938 	int fd, error, timeo;
939 	ksem_t *ks;
940 
941 	error = ksem_get(id, &ks, &fd);
942 	if (error) {
943 		return error;
944 	}
945 	KASSERT(mutex_owned(&ks->ks_lock));
946 	while (ks->ks_value == 0) {
947 		ks->ks_waiters++;
948 		if (!try_p && abstime != NULL) {
949 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
950 			    &timeo, NULL);
951 			if (error != 0)
952 				goto out;
953 		} else {
954 			timeo = 0;
955 		}
956 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
957 		    &ks->ks_lock, timeo);
958 		ks->ks_waiters--;
959 		if (error)
960 			goto out;
961 	}
962 	ks->ks_value--;
963 out:
964 	ksem_release(ks, fd);
965 	return error;
966 }
967 
968 int
969 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
970     register_t *retval)
971 {
972 	/* {
973 		intptr_t id;
974 	} */
975 
976 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
977 }
978 
979 int
980 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
981     register_t *retval)
982 {
983 	/* {
984 		intptr_t id;
985 		const struct timespec *abstime;
986 	} */
987 	struct timespec ts;
988 	int error;
989 
990 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
991 	if (error != 0)
992 		return error;
993 
994 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
995 		return EINVAL;
996 
997 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
998 	if (error == EWOULDBLOCK)
999 		error = ETIMEDOUT;
1000 	return error;
1001 }
1002 
1003 int
1004 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
1005     register_t *retval)
1006 {
1007 	/* {
1008 		intptr_t id;
1009 	} */
1010 
1011 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
1012 }
1013 
1014 int
1015 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
1016     register_t *retval)
1017 {
1018 	/* {
1019 		intptr_t id;
1020 		unsigned int *value;
1021 	} */
1022 	int fd, error;
1023 	ksem_t *ks;
1024 	unsigned int val;
1025 
1026 	error = ksem_get(SCARG(uap, id), &ks, &fd);
1027 	if (error) {
1028 		return error;
1029 	}
1030 	KASSERT(mutex_owned(&ks->ks_lock));
1031 	val = ks->ks_value;
1032 	ksem_release(ks, fd);
1033 
1034 	return copyout(&val, SCARG(uap, value), sizeof(val));
1035 }
1036 
1037 int
1038 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
1039     register_t *retval)
1040 {
1041 	/* {
1042 		intptr_t id;
1043 	} */
1044 	int fd, error;
1045 	ksem_t *ks;
1046 
1047 	intptr_t id = SCARG(uap, id);
1048 
1049 	error = ksem_get(id, &ks, &fd);
1050 	if (error) {
1051 		return error;
1052 	}
1053 	KASSERT(mutex_owned(&ks->ks_lock));
1054 
1055 	/* Operation is only for unnamed semaphores. */
1056 	if (ks->ks_name != NULL) {
1057 		error = EINVAL;
1058 		goto out;
1059 	}
1060 	/* Cannot destroy if there are waiters. */
1061 	if (ks->ks_waiters) {
1062 		error = EBUSY;
1063 		goto out;
1064 	}
1065 	if (KSEM_ID_IS_PSHARED(id)) {
1066 		/* Cannot destroy if we did't create it. */
1067 		KASSERT(fd == -1);
1068 		KASSERT(ks->ks_pshared_proc != NULL);
1069 		if (ks->ks_pshared_proc != curproc) {
1070 			error = EINVAL;
1071 			goto out;
1072 		}
1073 		fd = ks->ks_pshared_fd;
1074 
1075 		/* Mark it dead so subsequent lookups fail. */
1076 		ks->ks_pshared_proc = NULL;
1077 
1078 		/* Do an fd_getfile() to for the benefit of fd_close(). */
1079 		file_t *fp __diagused = fd_getfile(fd);
1080 		KASSERT(fp != NULL);
1081 		KASSERT(fp->f_ksem == ks);
1082 	}
1083 out:
1084 	ksem_release(ks, -1);
1085 	if (error) {
1086 		if (!KSEM_ID_IS_PSHARED(id))
1087 			fd_putfile(fd);
1088 		return error;
1089 	}
1090 	return fd_close(fd);
1091 }
1092