xref: /netbsd-src/sys/kern/uipc_sem.c (revision 369bde8f44daf5ad74687d82554eae9c577ea65d)
1 /*	$NetBSD: uipc_sem.c,v 1.62 2024/12/06 18:44:00 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 /*
59  * Implementation of POSIX semaphore.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.62 2024/12/06 18:44:00 riastradh Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/types.h>
67 
68 #include <sys/atomic.h>
69 #include <sys/cprng.h>
70 #include <sys/fcntl.h>
71 #include <sys/file.h>
72 #include <sys/filedesc.h>
73 #include <sys/kauth.h>
74 #include <sys/kernel.h>
75 #include <sys/kmem.h>
76 #include <sys/ksem.h>
77 #include <sys/lwp.h>
78 #include <sys/module.h>
79 #include <sys/mount.h>
80 #include <sys/mutex.h>
81 #include <sys/proc.h>
82 #include <sys/rwlock.h>
83 #include <sys/sdt.h>
84 #include <sys/semaphore.h>
85 #include <sys/stat.h>
86 #include <sys/syscall.h>
87 #include <sys/syscallargs.h>
88 #include <sys/syscallvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/uidinfo.h>
91 
92 MODULE(MODULE_CLASS_MISC, ksem, NULL);
93 
94 #define	SEM_MAX_NAMELEN		NAME_MAX
95 
96 #define	KS_UNLINKED		0x01
97 
98 static kmutex_t		ksem_lock	__cacheline_aligned;
99 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
100 static u_int		nsems_total	__cacheline_aligned;
101 static u_int		nsems		__cacheline_aligned;
102 
103 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
104 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
105 static u_long		ksem_pshared_hashmask __read_mostly;
106 
107 #define	KSEM_PSHARED_HASHSIZE	32
108 
109 static kauth_listener_t	ksem_listener;
110 
111 static int		ksem_sysinit(void);
112 static int		ksem_sysfini(bool);
113 static int		ksem_modcmd(modcmd_t, void *);
114 static void		ksem_release(ksem_t *, int);
115 static int		ksem_close_fop(file_t *);
116 static int		ksem_stat_fop(file_t *, struct stat *);
117 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
118     kauth_cred_t, int);
119 
120 static const struct fileops semops = {
121 	.fo_name = "sem",
122 	.fo_read = ksem_read_fop,
123 	.fo_write = fbadop_write,
124 	.fo_ioctl = fbadop_ioctl,
125 	.fo_fcntl = fnullop_fcntl,
126 	.fo_poll = fnullop_poll,
127 	.fo_stat = ksem_stat_fop,
128 	.fo_close = ksem_close_fop,
129 	.fo_kqfilter = fnullop_kqfilter,
130 	.fo_restart = fnullop_restart,
131 };
132 
133 static const struct syscall_package ksem_syscalls[] = {
134 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
135 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
136 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
137 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
138 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
139 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
140 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
141 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
142 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
143 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
144 	{ 0, 0, NULL },
145 };
146 
147 struct sysctllog *ksem_clog;
148 int ksem_max = KSEM_MAX;
149 
150 static int
151 name_copyin(const char *uname, char **name)
152 {
153 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
154 
155 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
156 	if (error)
157 		kmem_free(*name, SEM_MAX_NAMELEN);
158 
159 	return error;
160 }
161 
162 static void
163 name_destroy(char **name)
164 {
165 	if (!*name)
166 		return;
167 
168 	kmem_free(*name, SEM_MAX_NAMELEN);
169 	*name = NULL;
170 }
171 
172 static int
173 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
174     void *arg0, void *arg1, void *arg2, void *arg3)
175 {
176 	ksem_t *ks;
177 	mode_t mode;
178 
179 	if (action != KAUTH_SYSTEM_SEMAPHORE)
180 		return KAUTH_RESULT_DEFER;
181 
182 	ks = arg1;
183 	mode = ks->ks_mode;
184 
185 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
186 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
187 	    (mode & S_IWOTH) != 0)
188 		return KAUTH_RESULT_ALLOW;
189 
190 	return KAUTH_RESULT_DEFER;
191 }
192 
193 static int
194 ksem_sysinit(void)
195 {
196 	int error;
197 	const struct sysctlnode *rnode;
198 
199 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
200 	LIST_INIT(&ksem_head);
201 	nsems_total = 0;
202 	nsems = 0;
203 
204 	rw_init(&ksem_pshared_lock);
205 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
206 	    true, &ksem_pshared_hashmask);
207 	KASSERT(ksem_pshared_hashtab != NULL);
208 
209 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
210 	    ksem_listener_cb, NULL);
211 
212 	/* Define module-specific sysctl tree */
213 
214 	ksem_clog = NULL;
215 
216 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
217 			CTLFLAG_PERMANENT,
218 			CTLTYPE_NODE, "posix",
219 			SYSCTL_DESCR("POSIX options"),
220 			NULL, 0, NULL, 0,
221 			CTL_KERN, CTL_CREATE, CTL_EOL);
222 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
223 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
224 			CTLTYPE_INT, "semmax",
225 			SYSCTL_DESCR("Maximal number of semaphores"),
226 			NULL, 0, &ksem_max, 0,
227 			CTL_CREATE, CTL_EOL);
228 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
229 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
230 			CTLTYPE_INT, "semcnt",
231 			SYSCTL_DESCR("Current number of semaphores"),
232 			NULL, 0, &nsems, 0,
233 			CTL_CREATE, CTL_EOL);
234 
235 	error = syscall_establish(NULL, ksem_syscalls);
236 	if (error) {
237 		(void)ksem_sysfini(false);
238 	}
239 
240 	return error;
241 }
242 
243 static int
244 ksem_sysfini(bool interface)
245 {
246 	int error;
247 
248 	if (interface) {
249 		error = syscall_disestablish(NULL, ksem_syscalls);
250 		if (error != 0) {
251 			return error;
252 		}
253 		/*
254 		 * Make sure that no semaphores are in use.  Note: semops
255 		 * must be unused at this point.
256 		 */
257 		if (nsems_total) {
258 			error = syscall_establish(NULL, ksem_syscalls);
259 			KASSERT(error == 0);
260 			return SET_ERROR(EBUSY);
261 		}
262 	}
263 	kauth_unlisten_scope(ksem_listener);
264 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
265 	rw_destroy(&ksem_pshared_lock);
266 	mutex_destroy(&ksem_lock);
267 	sysctl_teardown(&ksem_clog);
268 	return 0;
269 }
270 
271 static int
272 ksem_modcmd(modcmd_t cmd, void *arg)
273 {
274 
275 	switch (cmd) {
276 	case MODULE_CMD_INIT:
277 		return ksem_sysinit();
278 
279 	case MODULE_CMD_FINI:
280 		return ksem_sysfini(true);
281 
282 	default:
283 		return SET_ERROR(ENOTTY);
284 	}
285 }
286 
287 static ksem_t *
288 ksem_lookup(const char *name)
289 {
290 	ksem_t *ks;
291 
292 	KASSERT(mutex_owned(&ksem_lock));
293 
294 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
295 		if (strcmp(ks->ks_name, name) == 0) {
296 			mutex_enter(&ks->ks_lock);
297 			return ks;
298 		}
299 	}
300 	return NULL;
301 }
302 
303 static int
304 ksem_perm(lwp_t *l, ksem_t *ks)
305 {
306 	kauth_cred_t uc = l->l_cred;
307 
308 	KASSERT(mutex_owned(&ks->ks_lock));
309 
310 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
311 		return SET_ERROR(EACCES);
312 
313 	return 0;
314 }
315 
316 /*
317  * Bits 1..23 are random, just pluck a few of those and assume the
318  * distribution is going to be pretty good.
319  */
320 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
321 
322 static void
323 ksem_remove_pshared(ksem_t *ksem)
324 {
325 	rw_enter(&ksem_pshared_lock, RW_WRITER);
326 	LIST_REMOVE(ksem, ks_entry);
327 	rw_exit(&ksem_pshared_lock);
328 }
329 
330 static ksem_t *
331 ksem_lookup_pshared_locked(intptr_t id)
332 {
333 	u_long bucket = KSEM_PSHARED_HASH(id);
334 	ksem_t *ksem = NULL;
335 
336 	/* ksem_t is locked and referenced upon return. */
337 
338 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
339 		if (ksem->ks_pshared_id == id) {
340 			mutex_enter(&ksem->ks_lock);
341 			if (ksem->ks_pshared_proc == NULL) {
342 				/*
343 				 * This entry is dead, and in the process
344 				 * of being torn down; skip it.
345 				 */
346 				mutex_exit(&ksem->ks_lock);
347 				continue;
348 			}
349 			ksem->ks_ref++;
350 			KASSERT(ksem->ks_ref != 0);
351 			return ksem;
352 		}
353 	}
354 
355 	return NULL;
356 }
357 
358 static ksem_t *
359 ksem_lookup_pshared(intptr_t id)
360 {
361 	rw_enter(&ksem_pshared_lock, RW_READER);
362 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
363 	rw_exit(&ksem_pshared_lock);
364 	return ksem;
365 }
366 
367 static void
368 ksem_alloc_pshared_id(ksem_t *ksem)
369 {
370 	ksem_t *ksem0;
371 	uint32_t try;
372 
373 	KASSERT(ksem->ks_pshared_proc != NULL);
374 
375 	rw_enter(&ksem_pshared_lock, RW_WRITER);
376 	for (;;) {
377 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
378 		    KSEM_PSHARED_MARKER;
379 
380 		if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
381 			/* Got it! */
382 			break;
383 		}
384 		ksem_release(ksem0, -1);
385 	}
386 	ksem->ks_pshared_id = try;
387 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
388 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
389 	rw_exit(&ksem_pshared_lock);
390 }
391 
392 /*
393  * ksem_get: get the semaphore from the descriptor.
394  *
395  * => locks the semaphore, if found, and holds an extra reference.
396  * => holds a reference on the file descriptor.
397  */
398 static int
399 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
400 {
401 	ksem_t *ks;
402 	int fd;
403 
404 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
405 		/*
406 		 * ksem_lookup_pshared() returns the ksem_t *
407 		 * locked and referenced.
408 		 */
409 		ks = ksem_lookup_pshared(id);
410 		if (ks == NULL)
411 			return SET_ERROR(EINVAL);
412 		KASSERT(ks->ks_pshared_id == id);
413 		KASSERT(ks->ks_pshared_proc != NULL);
414 		fd = -1;
415 	} else if (id <= INT_MAX) {
416 		fd = (int)id;
417 		file_t *fp = fd_getfile(fd);
418 
419 		if (__predict_false(fp == NULL))
420 			return SET_ERROR(EINVAL);
421 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
422 			fd_putfile(fd);
423 			return SET_ERROR(EINVAL);
424 		}
425 		ks = fp->f_ksem;
426 		mutex_enter(&ks->ks_lock);
427 		ks->ks_ref++;
428 	} else {
429 		return SET_ERROR(EINVAL);
430 	}
431 
432 	*ksret = ks;
433 	*fdp = fd;
434 	return 0;
435 }
436 
437 /*
438  * ksem_create: allocate and setup a new semaphore structure.
439  */
440 static int
441 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
442 {
443 	ksem_t *ks;
444 	kauth_cred_t uc;
445 	char *kname;
446 	size_t len;
447 
448 	/* Pre-check for the limit. */
449 	if (nsems >= ksem_max) {
450 		return SET_ERROR(ENFILE);
451 	}
452 
453 	if (val > SEM_VALUE_MAX) {
454 		return SET_ERROR(EINVAL);
455 	}
456 
457 	if (name != NULL) {
458 		len = strlen(name);
459 		if (len > SEM_MAX_NAMELEN) {
460 			return SET_ERROR(ENAMETOOLONG);
461 		}
462 		/* Name must start with a '/' but not contain one. */
463 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
464 			return SET_ERROR(EINVAL);
465 		}
466 		kname = kmem_alloc(++len, KM_SLEEP);
467 		strlcpy(kname, name, len);
468 	} else {
469 		kname = NULL;
470 		len = 0;
471 	}
472 
473 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
474 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
475 	cv_init(&ks->ks_cv, "psem");
476 	ks->ks_name = kname;
477 	ks->ks_namelen = len;
478 	ks->ks_mode = mode;
479 	ks->ks_value = val;
480 	ks->ks_ref = 1;
481 
482 	uc = l->l_cred;
483 	ks->ks_uid = kauth_cred_geteuid(uc);
484 	ks->ks_gid = kauth_cred_getegid(uc);
485 	chgsemcnt(ks->ks_uid, 1);
486 	atomic_inc_uint(&nsems_total);
487 
488 	*ksret = ks;
489 	return 0;
490 }
491 
492 static void
493 ksem_free(ksem_t *ks)
494 {
495 
496 	KASSERT(!cv_has_waiters(&ks->ks_cv));
497 
498 	chgsemcnt(ks->ks_uid, -1);
499 	atomic_dec_uint(&nsems_total);
500 
501 	if (ks->ks_pshared_id) {
502 		KASSERT(ks->ks_pshared_proc == NULL);
503 		ksem_remove_pshared(ks);
504 	}
505 	if (ks->ks_name) {
506 		KASSERT(ks->ks_namelen > 0);
507 		kmem_free(ks->ks_name, ks->ks_namelen);
508 	}
509 	mutex_destroy(&ks->ks_lock);
510 	cv_destroy(&ks->ks_cv);
511 	kmem_free(ks, sizeof(ksem_t));
512 }
513 
514 #define	KSEM_ID_IS_PSHARED(id)		\
515 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
516 
517 static void
518 ksem_release(ksem_t *ksem, int fd)
519 {
520 	bool destroy = false;
521 
522 	KASSERT(mutex_owned(&ksem->ks_lock));
523 
524 	KASSERT(ksem->ks_ref > 0);
525 	if (--ksem->ks_ref == 0) {
526 		/*
527 		 * Destroy if the last reference and semaphore is unnamed,
528 		 * or unlinked (for named semaphore).
529 		 */
530 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
531 		    (ksem->ks_name == NULL);
532 	}
533 	mutex_exit(&ksem->ks_lock);
534 
535 	if (destroy) {
536 		ksem_free(ksem);
537 	}
538 	if (fd != -1) {
539 		fd_putfile(fd);
540 	}
541 }
542 
543 int
544 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
545     register_t *retval)
546 {
547 	/* {
548 		unsigned int value;
549 		intptr_t *idp;
550 	} */
551 
552 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
553 	    copyin, copyout);
554 }
555 
556 int
557 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
558     copyout_t docopyout)
559 {
560 	proc_t *p = l->l_proc;
561 	ksem_t *ks;
562 	file_t *fp;
563 	intptr_t id, arg;
564 	int fd, error;
565 
566 	/*
567 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
568 	 * indicate that a pshared semaphore is wanted.  In that case we
569 	 * allocate globally unique ID and return that, rather than the
570 	 * process-scoped file descriptor ID.
571 	 */
572 	error = (*docopyin)(idp, &arg, sizeof(*idp));
573 	if (error) {
574 		return error;
575 	}
576 
577 	error = fd_allocfile(&fp, &fd);
578 	if (error) {
579 		return error;
580 	}
581 	fp->f_type = DTYPE_SEM;
582 	fp->f_flag = FREAD | FWRITE;
583 	fp->f_ops = &semops;
584 
585 	if (fd >= KSEM_MARKER_MIN) {
586 		/*
587 		 * This is super-unlikely, but we check for it anyway
588 		 * because potential collisions with the pshared marker
589 		 * would be bad.
590 		 */
591 		fd_abort(p, fp, fd);
592 		return SET_ERROR(EMFILE);
593 	}
594 
595 	/* Note the mode does not matter for anonymous semaphores. */
596 	error = ksem_create(l, NULL, &ks, 0, val);
597 	if (error) {
598 		fd_abort(p, fp, fd);
599 		return error;
600 	}
601 
602 	if (arg == KSEM_PSHARED) {
603 		ks->ks_pshared_proc = curproc;
604 		ks->ks_pshared_fd = fd;
605 		ksem_alloc_pshared_id(ks);
606 		id = ks->ks_pshared_id;
607 	} else {
608 		id = (intptr_t)fd;
609 	}
610 
611 	error = (*docopyout)(&id, idp, sizeof(*idp));
612 	if (error) {
613 		ksem_free(ks);
614 		fd_abort(p, fp, fd);
615 		return error;
616 	}
617 
618 	fp->f_ksem = ks;
619 	fd_affix(p, fp, fd);
620 	return error;
621 }
622 
623 int
624 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
625     register_t *retval)
626 {
627 	/* {
628 		const char *name;
629 		int oflag;
630 		mode_t mode;
631 		unsigned int value;
632 		intptr_t *idp;
633 	} */
634 
635 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
636 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
637 }
638 
639 int
640 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
641      unsigned int value, intptr_t *idp, copyout_t docopyout)
642 {
643 	char *name;
644 	proc_t *p = l->l_proc;
645 	ksem_t *ksnew = NULL, *ks;
646 	file_t *fp;
647 	intptr_t id;
648 	int fd, error;
649 
650 	error = name_copyin(semname, &name);
651 	if (error) {
652 		return error;
653 	}
654 	error = fd_allocfile(&fp, &fd);
655 	if (error) {
656 		name_destroy(&name);
657 		return error;
658 	}
659 	fp->f_type = DTYPE_SEM;
660 	fp->f_flag = FREAD | FWRITE;
661 	fp->f_ops = &semops;
662 
663 	if (fd >= KSEM_MARKER_MIN) {
664 		/*
665 		 * This is super-unlikely, but we check for it anyway
666 		 * because potential collisions with the pshared marker
667 		 * would be bad.
668 		 */
669 		fd_abort(p, fp, fd);
670 		return SET_ERROR(EMFILE);
671 	}
672 
673 	/*
674 	 * The ID (file descriptor number) can be stored early.
675 	 * Note that zero is a special value for libpthread.
676 	 */
677 	id = (intptr_t)fd;
678 	error = (*docopyout)(&id, idp, sizeof(*idp));
679 	if (error) {
680 		goto err;
681 	}
682 
683 	if (oflag & O_CREAT) {
684 		/* Create a new semaphore. */
685 		error = ksem_create(l, name, &ksnew, mode, value);
686 		if (error) {
687 			goto err;
688 		}
689 		KASSERT(ksnew != NULL);
690 	}
691 
692 	/* Lookup for a semaphore with such name. */
693 	mutex_enter(&ksem_lock);
694 	ks = ksem_lookup(name);
695 	name_destroy(&name);
696 	if (ks) {
697 		KASSERT(mutex_owned(&ks->ks_lock));
698 		mutex_exit(&ksem_lock);
699 
700 		/* Check for exclusive create. */
701 		if (oflag & O_EXCL) {
702 			mutex_exit(&ks->ks_lock);
703 			error = SET_ERROR(EEXIST);
704 			goto err;
705 		}
706 		/*
707 		 * Verify permissions.  If we can access it,
708 		 * add the reference of this thread.
709 		 */
710 		error = ksem_perm(l, ks);
711 		if (error == 0) {
712 			ks->ks_ref++;
713 		}
714 		mutex_exit(&ks->ks_lock);
715 		if (error) {
716 			goto err;
717 		}
718 	} else {
719 		/* Fail if not found and not creating. */
720 		if ((oflag & O_CREAT) == 0) {
721 			mutex_exit(&ksem_lock);
722 			KASSERT(ksnew == NULL);
723 			error = SET_ERROR(ENOENT);
724 			goto err;
725 		}
726 
727 		/* Check for the limit locked. */
728 		if (nsems >= ksem_max) {
729 			mutex_exit(&ksem_lock);
730 			error = SET_ERROR(ENFILE);
731 			goto err;
732 		}
733 
734 		/*
735 		 * Finally, insert semaphore into the list.
736 		 * Note: it already has the initial reference.
737 		 */
738 		ks = ksnew;
739 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
740 		nsems++;
741 		mutex_exit(&ksem_lock);
742 
743 		ksnew = NULL;
744 	}
745 	KASSERT(ks != NULL);
746 	fp->f_ksem = ks;
747 	fd_affix(p, fp, fd);
748 err:
749 	name_destroy(&name);
750 	if (error) {
751 		fd_abort(p, fp, fd);
752 	}
753 	if (ksnew) {
754 		ksem_free(ksnew);
755 	}
756 	return error;
757 }
758 
759 int
760 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
761     register_t *retval)
762 {
763 	/* {
764 		intptr_t id;
765 	} */
766 	intptr_t id = SCARG(uap, id);
767 	int fd, error;
768 	ksem_t *ks;
769 
770 	error = ksem_get(id, &ks, &fd);
771 	if (error) {
772 		return error;
773 	}
774 
775 	/* This is only for named semaphores. */
776 	if (ks->ks_name == NULL) {
777 		error = SET_ERROR(EINVAL);
778 	}
779 	ksem_release(ks, -1);
780 	if (error) {
781 		if (fd != -1)
782 			fd_putfile(fd);
783 		return error;
784 	}
785 	return fd_close(fd);
786 }
787 
788 static int
789 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
790     int flags)
791 {
792 	size_t len;
793 	char *name;
794 	ksem_t *ks = fp->f_ksem;
795 
796 	mutex_enter(&ks->ks_lock);
797 	len = ks->ks_namelen;
798 	name = ks->ks_name;
799 	mutex_exit(&ks->ks_lock);
800 	if (name == NULL || len == 0)
801 		return 0;
802 	return uiomove(name, len, uio);
803 }
804 
805 static int
806 ksem_stat_fop(file_t *fp, struct stat *ub)
807 {
808 	ksem_t *ks = fp->f_ksem;
809 
810 	mutex_enter(&ks->ks_lock);
811 
812 	memset(ub, 0, sizeof(*ub));
813 
814 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
815 	    ? _S_IFLNK : _S_IFREG);
816 	ub->st_uid = ks->ks_uid;
817 	ub->st_gid = ks->ks_gid;
818 	ub->st_size = ks->ks_value;
819 	ub->st_blocks = (ub->st_size) ? 1 : 0;
820 	ub->st_nlink = ks->ks_ref;
821 	ub->st_blksize = 4096;
822 
823 	nanotime(&ub->st_atimespec);
824 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
825 	    ub->st_atimespec;
826 
827 	/*
828 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
829 	 * XXX (st_dev, st_ino) should be unique.
830 	 */
831 	mutex_exit(&ks->ks_lock);
832 	return 0;
833 }
834 
835 static int
836 ksem_close_fop(file_t *fp)
837 {
838 	ksem_t *ks = fp->f_ksem;
839 
840 	mutex_enter(&ks->ks_lock);
841 
842 	if (ks->ks_pshared_id) {
843 		if (ks->ks_pshared_proc != curproc) {
844 			/* Do nothing if this is not the creator. */
845 			mutex_exit(&ks->ks_lock);
846 			return 0;
847 		}
848 		/* Mark this semaphore as dead. */
849 		ks->ks_pshared_proc = NULL;
850 	}
851 
852 	ksem_release(ks, -1);
853 	return 0;
854 }
855 
856 int
857 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
858     register_t *retval)
859 {
860 	/* {
861 		const char *name;
862 	} */
863 	char *name;
864 	ksem_t *ks;
865 	u_int refcnt;
866 	int error;
867 
868 	error = name_copyin(SCARG(uap, name), &name);
869 	if (error)
870 		return error;
871 
872 	mutex_enter(&ksem_lock);
873 	ks = ksem_lookup(name);
874 	name_destroy(&name);
875 	if (ks == NULL) {
876 		mutex_exit(&ksem_lock);
877 		return SET_ERROR(ENOENT);
878 	}
879 	KASSERT(mutex_owned(&ks->ks_lock));
880 
881 	/* Verify permissions. */
882 	error = ksem_perm(l, ks);
883 	if (error) {
884 		mutex_exit(&ks->ks_lock);
885 		mutex_exit(&ksem_lock);
886 		return error;
887 	}
888 
889 	/* Remove from the global list. */
890 	LIST_REMOVE(ks, ks_entry);
891 	nsems--;
892 	mutex_exit(&ksem_lock);
893 
894 	refcnt = ks->ks_ref;
895 	if (refcnt) {
896 		/* Mark as unlinked, if there are references. */
897 		ks->ks_flags |= KS_UNLINKED;
898 	}
899 	mutex_exit(&ks->ks_lock);
900 
901 	if (refcnt == 0) {
902 		ksem_free(ks);
903 	}
904 	return 0;
905 }
906 
907 int
908 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
909     register_t *retval)
910 {
911 	/* {
912 		intptr_t id;
913 	} */
914 	int fd, error;
915 	ksem_t *ks;
916 
917 	error = ksem_get(SCARG(uap, id), &ks, &fd);
918 	if (error) {
919 		return error;
920 	}
921 	KASSERT(mutex_owned(&ks->ks_lock));
922 	if (ks->ks_value == SEM_VALUE_MAX) {
923 		error = SET_ERROR(EOVERFLOW);
924 		goto out;
925 	}
926 	ks->ks_value++;
927 	if (ks->ks_waiters) {
928 		cv_broadcast(&ks->ks_cv);
929 	}
930 out:
931 	ksem_release(ks, fd);
932 	return error;
933 }
934 
935 int
936 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
937 {
938 	int fd, error, timeo;
939 	ksem_t *ks;
940 
941 	error = ksem_get(id, &ks, &fd);
942 	if (error) {
943 		return error;
944 	}
945 	KASSERT(mutex_owned(&ks->ks_lock));
946 	while (ks->ks_value == 0) {
947 		ks->ks_waiters++;
948 		if (!try_p && abstime != NULL) {
949 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
950 			    &timeo, NULL);
951 			if (error != 0)
952 				goto out;
953 		} else {
954 			timeo = 0;
955 		}
956 		error = try_p ? SET_ERROR(EAGAIN) : cv_timedwait_sig(&ks->ks_cv,
957 		    &ks->ks_lock, timeo);
958 		ks->ks_waiters--;
959 		if (error)
960 			goto out;
961 	}
962 	ks->ks_value--;
963 out:
964 	ksem_release(ks, fd);
965 	return error;
966 }
967 
968 int
969 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
970     register_t *retval)
971 {
972 	/* {
973 		intptr_t id;
974 	} */
975 
976 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
977 }
978 
979 int
980 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
981     register_t *retval)
982 {
983 	/* {
984 		intptr_t id;
985 		const struct timespec *abstime;
986 	} */
987 	struct timespec ts;
988 	int error;
989 
990 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
991 	if (error != 0)
992 		return error;
993 
994 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
995 		return SET_ERROR(EINVAL);
996 
997 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
998 	if (error == EWOULDBLOCK)
999 		error = SET_ERROR(ETIMEDOUT);
1000 	return error;
1001 }
1002 
1003 int
1004 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
1005     register_t *retval)
1006 {
1007 	/* {
1008 		intptr_t id;
1009 	} */
1010 
1011 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
1012 }
1013 
1014 int
1015 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
1016     register_t *retval)
1017 {
1018 	/* {
1019 		intptr_t id;
1020 		unsigned int *value;
1021 	} */
1022 	int fd, error;
1023 	ksem_t *ks;
1024 	unsigned int val;
1025 
1026 	error = ksem_get(SCARG(uap, id), &ks, &fd);
1027 	if (error) {
1028 		return error;
1029 	}
1030 	KASSERT(mutex_owned(&ks->ks_lock));
1031 	val = ks->ks_value;
1032 	ksem_release(ks, fd);
1033 
1034 	return copyout(&val, SCARG(uap, value), sizeof(val));
1035 }
1036 
1037 int
1038 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
1039     register_t *retval)
1040 {
1041 	/* {
1042 		intptr_t id;
1043 	} */
1044 	int fd, error;
1045 	ksem_t *ks;
1046 
1047 	intptr_t id = SCARG(uap, id);
1048 
1049 	error = ksem_get(id, &ks, &fd);
1050 	if (error) {
1051 		return error;
1052 	}
1053 	KASSERT(mutex_owned(&ks->ks_lock));
1054 
1055 	/* Operation is only for unnamed semaphores. */
1056 	if (ks->ks_name != NULL) {
1057 		error = SET_ERROR(EINVAL);
1058 		goto out;
1059 	}
1060 	/* Cannot destroy if there are waiters. */
1061 	if (ks->ks_waiters) {
1062 		error = SET_ERROR(EBUSY);
1063 		goto out;
1064 	}
1065 	if (KSEM_ID_IS_PSHARED(id)) {
1066 		/* Cannot destroy if we did't create it. */
1067 		KASSERT(fd == -1);
1068 		KASSERT(ks->ks_pshared_proc != NULL);
1069 		if (ks->ks_pshared_proc != curproc) {
1070 			error = SET_ERROR(EINVAL);
1071 			goto out;
1072 		}
1073 		fd = ks->ks_pshared_fd;
1074 
1075 		/* Mark it dead so subsequent lookups fail. */
1076 		ks->ks_pshared_proc = NULL;
1077 
1078 		/* Do an fd_getfile() to for the benefit of fd_close(). */
1079 		file_t *fp __diagused = fd_getfile(fd);
1080 		KASSERT(fp != NULL);
1081 		KASSERT(fp->f_ksem == ks);
1082 	}
1083 out:
1084 	ksem_release(ks, -1);
1085 	if (error) {
1086 		if (!KSEM_ID_IS_PSHARED(id))
1087 			fd_putfile(fd);
1088 		return error;
1089 	}
1090 	return fd_close(fd);
1091 }
1092