xref: /netbsd-src/sys/kern/uipc_sem.c (revision 2d8e86c2f207da6fbbd50f11b6f33765ebdfa0e9)
1 /*	$NetBSD: uipc_sem.c,v 1.55 2019/03/01 03:03:19 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 /*
59  * Implementation of POSIX semaphore.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.55 2019/03/01 03:03:19 christos Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/kernel.h>
67 
68 #include <sys/atomic.h>
69 #include <sys/proc.h>
70 #include <sys/lwp.h>
71 #include <sys/ksem.h>
72 #include <sys/syscall.h>
73 #include <sys/stat.h>
74 #include <sys/kmem.h>
75 #include <sys/fcntl.h>
76 #include <sys/file.h>
77 #include <sys/filedesc.h>
78 #include <sys/kauth.h>
79 #include <sys/module.h>
80 #include <sys/mount.h>
81 #include <sys/mutex.h>
82 #include <sys/rwlock.h>
83 #include <sys/semaphore.h>
84 #include <sys/syscall.h>
85 #include <sys/syscallargs.h>
86 #include <sys/syscallvar.h>
87 #include <sys/sysctl.h>
88 #include <sys/uidinfo.h>
89 #include <sys/cprng.h>
90 
91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
92 
93 #define	SEM_MAX_NAMELEN		NAME_MAX
94 
95 #define	SEM_NSEMS_MAX		256
96 #define	KS_UNLINKED		0x01
97 
98 static kmutex_t		ksem_lock	__cacheline_aligned;
99 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
100 static u_int		nsems_total	__cacheline_aligned;
101 static u_int		nsems		__cacheline_aligned;
102 
103 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
104 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
105 static u_long		ksem_pshared_hashmask __read_mostly;
106 
107 #define	KSEM_PSHARED_HASHSIZE	32
108 
109 static kauth_listener_t	ksem_listener;
110 
111 static int		ksem_sysinit(void);
112 static int		ksem_sysfini(bool);
113 static int		ksem_modcmd(modcmd_t, void *);
114 static int		ksem_close_fop(file_t *);
115 static int		ksem_stat_fop(file_t *, struct stat *);
116 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
117     kauth_cred_t, int);
118 
119 static const struct fileops semops = {
120 	.fo_name = "sem",
121 	.fo_read = ksem_read_fop,
122 	.fo_write = fbadop_write,
123 	.fo_ioctl = fbadop_ioctl,
124 	.fo_fcntl = fnullop_fcntl,
125 	.fo_poll = fnullop_poll,
126 	.fo_stat = ksem_stat_fop,
127 	.fo_close = ksem_close_fop,
128 	.fo_kqfilter = fnullop_kqfilter,
129 	.fo_restart = fnullop_restart,
130 };
131 
132 static const struct syscall_package ksem_syscalls[] = {
133 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
134 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
135 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
136 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
137 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
138 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
139 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
140 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
141 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
142 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
143 	{ 0, 0, NULL },
144 };
145 
146 struct sysctllog *ksem_clog;
147 int ksem_max;
148 
149 static int
150 name_copyin(const char *uname, char **name)
151 {
152 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
153 
154 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
155 	if (error)
156 		kmem_free(*name, SEM_MAX_NAMELEN);
157 
158 	return error;
159 }
160 
161 static void
162 name_destroy(char **name)
163 {
164 	if (!*name)
165 		return;
166 
167 	kmem_free(*name, SEM_MAX_NAMELEN);
168 	*name = NULL;
169 }
170 
171 static int
172 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
173     void *arg0, void *arg1, void *arg2, void *arg3)
174 {
175 	ksem_t *ks;
176 	mode_t mode;
177 
178 	if (action != KAUTH_SYSTEM_SEMAPHORE)
179 		return KAUTH_RESULT_DEFER;
180 
181 	ks = arg1;
182 	mode = ks->ks_mode;
183 
184 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
185 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
186 	    (mode & S_IWOTH) != 0)
187 		return KAUTH_RESULT_ALLOW;
188 
189 	return KAUTH_RESULT_DEFER;
190 }
191 
192 static int
193 ksem_sysinit(void)
194 {
195 	int error;
196 	const struct sysctlnode *rnode;
197 
198 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
199 	LIST_INIT(&ksem_head);
200 	nsems_total = 0;
201 	nsems = 0;
202 
203 	rw_init(&ksem_pshared_lock);
204 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
205 	    true, &ksem_pshared_hashmask);
206 	KASSERT(ksem_pshared_hashtab != NULL);
207 
208 	error = syscall_establish(NULL, ksem_syscalls);
209 	if (error) {
210 		(void)ksem_sysfini(false);
211 	}
212 
213 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
214 	    ksem_listener_cb, NULL);
215 
216 	/* Define module-specific sysctl tree */
217 
218 	ksem_max = KSEM_MAX;
219 	ksem_clog = NULL;
220 
221 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
222 			CTLFLAG_PERMANENT,
223 			CTLTYPE_NODE, "posix",
224 			SYSCTL_DESCR("POSIX options"),
225 			NULL, 0, NULL, 0,
226 			CTL_KERN, CTL_CREATE, CTL_EOL);
227 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
228 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
229 			CTLTYPE_INT, "semmax",
230 			SYSCTL_DESCR("Maximal number of semaphores"),
231 			NULL, 0, &ksem_max, 0,
232 			CTL_CREATE, CTL_EOL);
233 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
234 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
235 			CTLTYPE_INT, "semcnt",
236 			SYSCTL_DESCR("Current number of semaphores"),
237 			NULL, 0, &nsems, 0,
238 			CTL_CREATE, CTL_EOL);
239 
240 	return error;
241 }
242 
243 static int
244 ksem_sysfini(bool interface)
245 {
246 	int error;
247 
248 	if (interface) {
249 		error = syscall_disestablish(NULL, ksem_syscalls);
250 		if (error != 0) {
251 			return error;
252 		}
253 		/*
254 		 * Make sure that no semaphores are in use.  Note: semops
255 		 * must be unused at this point.
256 		 */
257 		if (nsems_total) {
258 			error = syscall_establish(NULL, ksem_syscalls);
259 			KASSERT(error == 0);
260 			return EBUSY;
261 		}
262 	}
263 	kauth_unlisten_scope(ksem_listener);
264 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
265 	rw_destroy(&ksem_pshared_lock);
266 	mutex_destroy(&ksem_lock);
267 	sysctl_teardown(&ksem_clog);
268 	return 0;
269 }
270 
271 static int
272 ksem_modcmd(modcmd_t cmd, void *arg)
273 {
274 
275 	switch (cmd) {
276 	case MODULE_CMD_INIT:
277 		return ksem_sysinit();
278 
279 	case MODULE_CMD_FINI:
280 		return ksem_sysfini(true);
281 
282 	default:
283 		return ENOTTY;
284 	}
285 }
286 
287 static ksem_t *
288 ksem_lookup(const char *name)
289 {
290 	ksem_t *ks;
291 
292 	KASSERT(mutex_owned(&ksem_lock));
293 
294 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
295 		if (strcmp(ks->ks_name, name) == 0) {
296 			mutex_enter(&ks->ks_lock);
297 			return ks;
298 		}
299 	}
300 	return NULL;
301 }
302 
303 static int
304 ksem_perm(lwp_t *l, ksem_t *ks)
305 {
306 	kauth_cred_t uc = l->l_cred;
307 
308 	KASSERT(mutex_owned(&ks->ks_lock));
309 
310 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
311 		return EACCES;
312 
313 	return 0;
314 }
315 
316 /*
317  * Bits 1..23 are random, just pluck a few of those and assume the
318  * distribution is going to be pretty good.
319  */
320 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
321 
322 static void
323 ksem_remove_pshared(ksem_t *ksem)
324 {
325 	rw_enter(&ksem_pshared_lock, RW_WRITER);
326 	LIST_REMOVE(ksem, ks_entry);
327 	rw_exit(&ksem_pshared_lock);
328 }
329 
330 static ksem_t *
331 ksem_lookup_pshared_locked(intptr_t id)
332 {
333 	u_long bucket = KSEM_PSHARED_HASH(id);
334 	ksem_t *ksem = NULL;
335 
336 	/* ksem_t is locked and referenced upon return. */
337 
338 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
339 		if (ksem->ks_pshared_id == id) {
340 			mutex_enter(&ksem->ks_lock);
341 			if (ksem->ks_pshared_proc == NULL) {
342 				/*
343 				 * This entry is dead, and in the process
344 				 * of being torn down; skip it.
345 				 */
346 				mutex_exit(&ksem->ks_lock);
347 				continue;
348 			}
349 			ksem->ks_ref++;
350 			KASSERT(ksem->ks_ref != 0);
351 			return ksem;
352 		}
353 	}
354 
355 	return NULL;
356 }
357 
358 static ksem_t *
359 ksem_lookup_pshared(intptr_t id)
360 {
361 	rw_enter(&ksem_pshared_lock, RW_READER);
362 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
363 	rw_exit(&ksem_pshared_lock);
364 	return ksem;
365 }
366 
367 static void
368 ksem_alloc_pshared_id(ksem_t *ksem)
369 {
370 	uint32_t try;
371 
372 	KASSERT(ksem->ks_pshared_proc != NULL);
373 
374 	rw_enter(&ksem_pshared_lock, RW_WRITER);
375 	for (;;) {
376 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
377 		    KSEM_PSHARED_MARKER;
378 
379 		if (ksem_lookup_pshared_locked(try) == NULL) {
380 			/* Got it! */
381 			break;
382 		}
383 	}
384 	ksem->ks_pshared_id = try;
385 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
386 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
387 	rw_exit(&ksem_pshared_lock);
388 }
389 
390 /*
391  * ksem_get: get the semaphore from the descriptor.
392  *
393  * => locks the semaphore, if found, and holds an extra reference.
394  * => holds a reference on the file descriptor.
395  */
396 static int
397 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
398 {
399 	ksem_t *ks;
400 	int fd;
401 
402 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
403 		/*
404 		 * ksem_lookup_pshared() returns the ksem_t *
405 		 * locked and referenced.
406 		 */
407 		ks = ksem_lookup_pshared(id);
408 		if (ks == NULL)
409 			return EINVAL;
410 		KASSERT(ks->ks_pshared_id == id);
411 		KASSERT(ks->ks_pshared_proc != NULL);
412 		fd = -1;
413 	} else if (id <= INT_MAX) {
414 		fd = (int)id;
415 		file_t *fp = fd_getfile(fd);
416 
417 		if (__predict_false(fp == NULL))
418 			return EINVAL;
419 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
420 			fd_putfile(fd);
421 			return EINVAL;
422 		}
423 		ks = fp->f_ksem;
424 		mutex_enter(&ks->ks_lock);
425 		ks->ks_ref++;
426 	} else {
427 		return EINVAL;
428 	}
429 
430 	*ksret = ks;
431 	*fdp = fd;
432 	return 0;
433 }
434 
435 /*
436  * ksem_create: allocate and setup a new semaphore structure.
437  */
438 static int
439 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
440 {
441 	ksem_t *ks;
442 	kauth_cred_t uc;
443 	char *kname;
444 	size_t len;
445 
446 	/* Pre-check for the limit. */
447 	if (nsems >= ksem_max) {
448 		return ENFILE;
449 	}
450 
451 	if (val > SEM_VALUE_MAX) {
452 		return EINVAL;
453 	}
454 
455 	if (name != NULL) {
456 		len = strlen(name);
457 		if (len > SEM_MAX_NAMELEN) {
458 			return ENAMETOOLONG;
459 		}
460 		/* Name must start with a '/' but not contain one. */
461 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
462 			return EINVAL;
463 		}
464 		kname = kmem_alloc(++len, KM_SLEEP);
465 		strlcpy(kname, name, len);
466 	} else {
467 		kname = NULL;
468 		len = 0;
469 	}
470 
471 	u_int cnt;
472 	uid_t uid = kauth_cred_getuid(l->l_cred);
473 	if ((cnt = chgsemcnt(uid, 1)) > SEM_NSEMS_MAX) {
474 		chgsemcnt(uid, -1);
475 		if (kname != NULL)
476 			kmem_free(kname, len);
477 		return ENOSPC;
478 	}
479 
480 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
481 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
482 	cv_init(&ks->ks_cv, "psem");
483 	ks->ks_name = kname;
484 	ks->ks_namelen = len;
485 	ks->ks_mode = mode;
486 	ks->ks_value = val;
487 	ks->ks_ref = 1;
488 
489 	uc = l->l_cred;
490 	ks->ks_uid = kauth_cred_geteuid(uc);
491 	ks->ks_gid = kauth_cred_getegid(uc);
492 
493 	atomic_inc_uint(&nsems_total);
494 	*ksret = ks;
495 	return 0;
496 }
497 
498 static void
499 ksem_free(ksem_t *ks)
500 {
501 
502 	KASSERT(!cv_has_waiters(&ks->ks_cv));
503 
504 	if (ks->ks_pshared_id) {
505 		KASSERT(ks->ks_pshared_proc == NULL);
506 		ksem_remove_pshared(ks);
507 	}
508 	if (ks->ks_name) {
509 		KASSERT(ks->ks_namelen > 0);
510 		kmem_free(ks->ks_name, ks->ks_namelen);
511 	}
512 	mutex_destroy(&ks->ks_lock);
513 	cv_destroy(&ks->ks_cv);
514 	kmem_free(ks, sizeof(ksem_t));
515 
516 	atomic_dec_uint(&nsems_total);
517 	chgsemcnt(kauth_cred_getuid(curproc->p_cred), -1);
518 }
519 
520 #define	KSEM_ID_IS_PSHARED(id)		\
521 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
522 
523 static void
524 ksem_release(ksem_t *ksem, int fd)
525 {
526 	bool destroy = false;
527 
528 	KASSERT(mutex_owned(&ksem->ks_lock));
529 
530 	KASSERT(ksem->ks_ref > 0);
531 	if (--ksem->ks_ref == 0) {
532 		/*
533 		 * Destroy if the last reference and semaphore is unnamed,
534 		 * or unlinked (for named semaphore).
535 		 */
536 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
537 		    (ksem->ks_name == NULL);
538 	}
539 	mutex_exit(&ksem->ks_lock);
540 
541 	if (destroy) {
542 		ksem_free(ksem);
543 	}
544 	if (fd != -1) {
545 		fd_putfile(fd);
546 	}
547 }
548 
549 int
550 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
551     register_t *retval)
552 {
553 	/* {
554 		unsigned int value;
555 		intptr_t *idp;
556 	} */
557 
558 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
559 	    copyin, copyout);
560 }
561 
562 int
563 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
564     copyout_t docopyout)
565 {
566 	proc_t *p = l->l_proc;
567 	ksem_t *ks;
568 	file_t *fp;
569 	intptr_t id, arg;
570 	int fd, error;
571 
572 	/*
573 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
574 	 * indicate that a pshared semaphore is wanted.  In that case we
575 	 * allocate globally unique ID and return that, rather than the
576 	 * process-scoped file descriptor ID.
577 	 */
578 	error = (*docopyin)(idp, &arg, sizeof(*idp));
579 	if (error) {
580 		return error;
581 	}
582 
583 	error = fd_allocfile(&fp, &fd);
584 	if (error) {
585 		return error;
586 	}
587 	fp->f_type = DTYPE_SEM;
588 	fp->f_flag = FREAD | FWRITE;
589 	fp->f_ops = &semops;
590 
591 	if (fd >= KSEM_MARKER_MIN) {
592 		/*
593 		 * This is super-unlikely, but we check for it anyway
594 		 * because potential collisions with the pshared marker
595 		 * would be bad.
596 		 */
597 		fd_abort(p, fp, fd);
598 		return EMFILE;
599 	}
600 
601 	/* Note the mode does not matter for anonymous semaphores. */
602 	error = ksem_create(l, NULL, &ks, 0, val);
603 	if (error) {
604 		fd_abort(p, fp, fd);
605 		return error;
606 	}
607 
608 	if (arg == KSEM_PSHARED) {
609 		ks->ks_pshared_proc = curproc;
610 		ks->ks_pshared_fd = fd;
611 		ksem_alloc_pshared_id(ks);
612 		id = ks->ks_pshared_id;
613 	} else {
614 		id = (intptr_t)fd;
615 	}
616 
617 	error = (*docopyout)(&id, idp, sizeof(*idp));
618 	if (error) {
619 		ksem_free(ks);
620 		fd_abort(p, fp, fd);
621 		return error;
622 	}
623 
624 	fp->f_ksem = ks;
625 	fd_affix(p, fp, fd);
626 	return error;
627 }
628 
629 int
630 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
631     register_t *retval)
632 {
633 	/* {
634 		const char *name;
635 		int oflag;
636 		mode_t mode;
637 		unsigned int value;
638 		intptr_t *idp;
639 	} */
640 
641 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
642 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
643 }
644 
645 int
646 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
647      unsigned int value, intptr_t *idp, copyout_t docopyout)
648 {
649 	char *name;
650 	proc_t *p = l->l_proc;
651 	ksem_t *ksnew = NULL, *ks;
652 	file_t *fp;
653 	intptr_t id;
654 	int fd, error;
655 
656 	error = name_copyin(semname, &name);
657 	if (error) {
658 		return error;
659 	}
660 	error = fd_allocfile(&fp, &fd);
661 	if (error) {
662 		name_destroy(&name);
663 		return error;
664 	}
665 	fp->f_type = DTYPE_SEM;
666 	fp->f_flag = FREAD | FWRITE;
667 	fp->f_ops = &semops;
668 
669 	if (fd >= KSEM_MARKER_MIN) {
670 		/*
671 		 * This is super-unlikely, but we check for it anyway
672 		 * because potential collisions with the pshared marker
673 		 * would be bad.
674 		 */
675 		fd_abort(p, fp, fd);
676 		return EMFILE;
677 	}
678 
679 	/*
680 	 * The ID (file descriptor number) can be stored early.
681 	 * Note that zero is a special value for libpthread.
682 	 */
683 	id = (intptr_t)fd;
684 	error = (*docopyout)(&id, idp, sizeof(*idp));
685 	if (error) {
686 		goto err;
687 	}
688 
689 	if (oflag & O_CREAT) {
690 		/* Create a new semaphore. */
691 		error = ksem_create(l, name, &ksnew, mode, value);
692 		if (error) {
693 			goto err;
694 		}
695 		KASSERT(ksnew != NULL);
696 	}
697 
698 	/* Lookup for a semaphore with such name. */
699 	mutex_enter(&ksem_lock);
700 	ks = ksem_lookup(name);
701 	name_destroy(&name);
702 	if (ks) {
703 		KASSERT(mutex_owned(&ks->ks_lock));
704 		mutex_exit(&ksem_lock);
705 
706 		/* Check for exclusive create. */
707 		if (oflag & O_EXCL) {
708 			mutex_exit(&ks->ks_lock);
709 			error = EEXIST;
710 			goto err;
711 		}
712 		/*
713 		 * Verify permissions.  If we can access it,
714 		 * add the reference of this thread.
715 		 */
716 		error = ksem_perm(l, ks);
717 		if (error == 0) {
718 			ks->ks_ref++;
719 		}
720 		mutex_exit(&ks->ks_lock);
721 		if (error) {
722 			goto err;
723 		}
724 	} else {
725 		/* Fail if not found and not creating. */
726 		if ((oflag & O_CREAT) == 0) {
727 			mutex_exit(&ksem_lock);
728 			KASSERT(ksnew == NULL);
729 			error = ENOENT;
730 			goto err;
731 		}
732 
733 		/* Check for the limit locked. */
734 		if (nsems >= ksem_max) {
735 			mutex_exit(&ksem_lock);
736 			error = ENFILE;
737 			goto err;
738 		}
739 
740 		/*
741 		 * Finally, insert semaphore into the list.
742 		 * Note: it already has the initial reference.
743 		 */
744 		ks = ksnew;
745 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
746 		nsems++;
747 		mutex_exit(&ksem_lock);
748 
749 		ksnew = NULL;
750 	}
751 	KASSERT(ks != NULL);
752 	fp->f_ksem = ks;
753 	fd_affix(p, fp, fd);
754 err:
755 	name_destroy(&name);
756 	if (error) {
757 		fd_abort(p, fp, fd);
758 	}
759 	if (ksnew) {
760 		ksem_free(ksnew);
761 	}
762 	return error;
763 }
764 
765 int
766 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
767     register_t *retval)
768 {
769 	/* {
770 		intptr_t id;
771 	} */
772 	intptr_t id = SCARG(uap, id);
773 	int fd, error;
774 	ksem_t *ks;
775 
776 	error = ksem_get(id, &ks, &fd);
777 	if (error) {
778 		return error;
779 	}
780 
781 	/* This is only for named semaphores. */
782 	if (ks->ks_name == NULL) {
783 		error = EINVAL;
784 	}
785 	ksem_release(ks, -1);
786 	if (error) {
787 		if (fd != -1)
788 			fd_putfile(fd);
789 		return error;
790 	}
791 	return fd_close(fd);
792 }
793 
794 static int
795 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
796     int flags)
797 {
798 	size_t len;
799 	char *name;
800 	ksem_t *ks = fp->f_ksem;
801 
802 	mutex_enter(&ks->ks_lock);
803 	len = ks->ks_namelen;
804 	name = ks->ks_name;
805 	mutex_exit(&ks->ks_lock);
806 	if (name == NULL || len == 0)
807 		return 0;
808 	return uiomove(name, len, uio);
809 }
810 
811 static int
812 ksem_stat_fop(file_t *fp, struct stat *ub)
813 {
814 	ksem_t *ks = fp->f_ksem;
815 
816 	mutex_enter(&ks->ks_lock);
817 
818 	memset(ub, 0, sizeof(*ub));
819 
820 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
821 	    ? _S_IFLNK : _S_IFREG);
822 	ub->st_uid = ks->ks_uid;
823 	ub->st_gid = ks->ks_gid;
824 	ub->st_size = ks->ks_value;
825 	ub->st_blocks = (ub->st_size) ? 1 : 0;
826 	ub->st_nlink = ks->ks_ref;
827 	ub->st_blksize = 4096;
828 
829 	nanotime(&ub->st_atimespec);
830 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
831 	    ub->st_atimespec;
832 
833 	/*
834 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
835 	 * XXX (st_dev, st_ino) should be unique.
836 	 */
837 	mutex_exit(&ks->ks_lock);
838 	return 0;
839 }
840 
841 static int
842 ksem_close_fop(file_t *fp)
843 {
844 	ksem_t *ks = fp->f_ksem;
845 
846 	mutex_enter(&ks->ks_lock);
847 
848 	if (ks->ks_pshared_id) {
849 		if (ks->ks_pshared_proc != curproc) {
850 			/* Do nothing if this is not the creator. */
851 			mutex_exit(&ks->ks_lock);
852 			return 0;
853 		}
854 		/* Mark this semaphore as dead. */
855 		ks->ks_pshared_proc = NULL;
856 	}
857 
858 	ksem_release(ks, -1);
859 	return 0;
860 }
861 
862 int
863 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
864     register_t *retval)
865 {
866 	/* {
867 		const char *name;
868 	} */
869 	char *name;
870 	ksem_t *ks;
871 	u_int refcnt;
872 	int error;
873 
874 	error = name_copyin(SCARG(uap, name), &name);
875 	if (error)
876 		return error;
877 
878 	mutex_enter(&ksem_lock);
879 	ks = ksem_lookup(name);
880 	name_destroy(&name);
881 	if (ks == NULL) {
882 		mutex_exit(&ksem_lock);
883 		return ENOENT;
884 	}
885 	KASSERT(mutex_owned(&ks->ks_lock));
886 
887 	/* Verify permissions. */
888 	error = ksem_perm(l, ks);
889 	if (error) {
890 		mutex_exit(&ks->ks_lock);
891 		mutex_exit(&ksem_lock);
892 		return error;
893 	}
894 
895 	/* Remove from the global list. */
896 	LIST_REMOVE(ks, ks_entry);
897 	nsems--;
898 	mutex_exit(&ksem_lock);
899 
900 	refcnt = ks->ks_ref;
901 	if (refcnt) {
902 		/* Mark as unlinked, if there are references. */
903 		ks->ks_flags |= KS_UNLINKED;
904 	}
905 	mutex_exit(&ks->ks_lock);
906 
907 	if (refcnt == 0) {
908 		ksem_free(ks);
909 	}
910 	return 0;
911 }
912 
913 int
914 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
915     register_t *retval)
916 {
917 	/* {
918 		intptr_t id;
919 	} */
920 	int fd, error;
921 	ksem_t *ks;
922 
923 	error = ksem_get(SCARG(uap, id), &ks, &fd);
924 	if (error) {
925 		return error;
926 	}
927 	KASSERT(mutex_owned(&ks->ks_lock));
928 	if (ks->ks_value == SEM_VALUE_MAX) {
929 		error = EOVERFLOW;
930 		goto out;
931 	}
932 	ks->ks_value++;
933 	if (ks->ks_waiters) {
934 		cv_broadcast(&ks->ks_cv);
935 	}
936 out:
937 	ksem_release(ks, fd);
938 	return error;
939 }
940 
941 int
942 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
943 {
944 	int fd, error, timeo;
945 	ksem_t *ks;
946 
947 	error = ksem_get(id, &ks, &fd);
948 	if (error) {
949 		return error;
950 	}
951 	KASSERT(mutex_owned(&ks->ks_lock));
952 	while (ks->ks_value == 0) {
953 		ks->ks_waiters++;
954 		if (!try_p && abstime != NULL) {
955 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
956 			    &timeo, NULL);
957 			if (error != 0)
958 				goto out;
959 		} else {
960 			timeo = 0;
961 		}
962 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
963 		    &ks->ks_lock, timeo);
964 		ks->ks_waiters--;
965 		if (error)
966 			goto out;
967 	}
968 	ks->ks_value--;
969 out:
970 	ksem_release(ks, fd);
971 	return error;
972 }
973 
974 int
975 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
976     register_t *retval)
977 {
978 	/* {
979 		intptr_t id;
980 	} */
981 
982 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
983 }
984 
985 int
986 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
987     register_t *retval)
988 {
989 	/* {
990 		intptr_t id;
991 		const struct timespec *abstime;
992 	} */
993 	struct timespec ts;
994 	int error;
995 
996 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
997 	if (error != 0)
998 		return error;
999 
1000 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
1001 		return EINVAL;
1002 
1003 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
1004 	if (error == EWOULDBLOCK)
1005 		error = ETIMEDOUT;
1006 	return error;
1007 }
1008 
1009 int
1010 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
1011     register_t *retval)
1012 {
1013 	/* {
1014 		intptr_t id;
1015 	} */
1016 
1017 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
1018 }
1019 
1020 int
1021 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
1022     register_t *retval)
1023 {
1024 	/* {
1025 		intptr_t id;
1026 		unsigned int *value;
1027 	} */
1028 	int fd, error;
1029 	ksem_t *ks;
1030 	unsigned int val;
1031 
1032 	error = ksem_get(SCARG(uap, id), &ks, &fd);
1033 	if (error) {
1034 		return error;
1035 	}
1036 	KASSERT(mutex_owned(&ks->ks_lock));
1037 	val = ks->ks_value;
1038 	ksem_release(ks, fd);
1039 
1040 	return copyout(&val, SCARG(uap, value), sizeof(val));
1041 }
1042 
1043 int
1044 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
1045     register_t *retval)
1046 {
1047 	/* {
1048 		intptr_t id;
1049 	} */
1050 	int fd, error;
1051 	ksem_t *ks;
1052 
1053 	intptr_t id = SCARG(uap, id);
1054 
1055 	error = ksem_get(id, &ks, &fd);
1056 	if (error) {
1057 		return error;
1058 	}
1059 	KASSERT(mutex_owned(&ks->ks_lock));
1060 
1061 	/* Operation is only for unnamed semaphores. */
1062 	if (ks->ks_name != NULL) {
1063 		error = EINVAL;
1064 		goto out;
1065 	}
1066 	/* Cannot destroy if there are waiters. */
1067 	if (ks->ks_waiters) {
1068 		error = EBUSY;
1069 		goto out;
1070 	}
1071 	if (KSEM_ID_IS_PSHARED(id)) {
1072 		/* Cannot destroy if we did't create it. */
1073 		KASSERT(fd == -1);
1074 		KASSERT(ks->ks_pshared_proc != NULL);
1075 		if (ks->ks_pshared_proc != curproc) {
1076 			error = EINVAL;
1077 			goto out;
1078 		}
1079 		fd = ks->ks_pshared_fd;
1080 
1081 		/* Mark it dead so subsequent lookups fail. */
1082 		ks->ks_pshared_proc = NULL;
1083 
1084 		/* Do an fd_getfile() to for the benefit of fd_close(). */
1085 		file_t *fp __diagused = fd_getfile(fd);
1086 		KASSERT(fp != NULL);
1087 		KASSERT(fp->f_ksem == ks);
1088 	}
1089 out:
1090 	ksem_release(ks, -1);
1091 	if (error) {
1092 		if (!KSEM_ID_IS_PSHARED(id))
1093 			fd_putfile(fd);
1094 		return error;
1095 	}
1096 	return fd_close(fd);
1097 }
1098