xref: /netbsd-src/sys/kern/subr_devsw.c (revision 5f2f42719cd62ff11fd913b40b7ce19f07c4fd25)
1 /*	$NetBSD: subr_devsw.c,v 1.48 2022/08/28 12:24:39 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by MAEKAWA Masahide <gehenna@NetBSD.org>, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Overview
34  *
35  *	subr_devsw.c: registers device drivers by name and by major
36  *	number, and provides wrapper methods for performing I/O and
37  *	other tasks on device drivers, keying on the device number
38  *	(dev_t).
39  *
40  *	When the system is built, the config(8) command generates
41  *	static tables of device drivers built into the kernel image
42  *	along with their associated methods.  These are recorded in
43  *	the cdevsw0 and bdevsw0 tables.  Drivers can also be added to
44  *	and removed from the system dynamically.
45  *
46  * Allocation
47  *
48  *	When the system initially boots only the statically allocated
49  *	indexes (bdevsw0, cdevsw0) are used.  If these overflow due to
50  *	allocation, we allocate a fixed block of memory to hold the new,
51  *	expanded index.  This "fork" of the table is only ever performed
52  *	once in order to guarantee that other threads may safely access
53  *	the device tables:
54  *
55  *	o Once a thread has a "reference" to the table via an earlier
56  *	  open() call, we know that the entry in the table must exist
57  *	  and so it is safe to access it.
58  *
59  *	o Regardless of whether other threads see the old or new
60  *	  pointers, they will point to a correct device switch
61  *	  structure for the operation being performed.
62  *
63  *	XXX Currently, the wrapper methods such as cdev_read() verify
64  *	that a device driver does in fact exist before calling the
65  *	associated driver method.  This should be changed so that
66  *	once the device is has been referenced by a vnode (opened),
67  *	calling	the other methods should be valid until that reference
68  *	is dropped.
69  */
70 
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.48 2022/08/28 12:24:39 riastradh Exp $");
73 
74 #ifdef _KERNEL_OPT
75 #include "opt_dtrace.h"
76 #endif
77 
78 #include <sys/param.h>
79 #include <sys/conf.h>
80 #include <sys/kmem.h>
81 #include <sys/systm.h>
82 #include <sys/poll.h>
83 #include <sys/tty.h>
84 #include <sys/cpu.h>
85 #include <sys/buf.h>
86 #include <sys/reboot.h>
87 #include <sys/sdt.h>
88 #include <sys/atomic.h>
89 #include <sys/localcount.h>
90 #include <sys/pserialize.h>
91 #include <sys/xcall.h>
92 #include <sys/device.h>
93 
94 #ifdef DEVSW_DEBUG
95 #define	DPRINTF(x)	printf x
96 #else /* DEVSW_DEBUG */
97 #define	DPRINTF(x)
98 #endif /* DEVSW_DEBUG */
99 
100 #define	MAXDEVSW	512	/* the maximum of major device number */
101 #define	BDEVSW_SIZE	(sizeof(struct bdevsw *))
102 #define	CDEVSW_SIZE	(sizeof(struct cdevsw *))
103 #define	DEVSWCONV_SIZE	(sizeof(struct devsw_conv))
104 
105 struct devswref {
106 	struct localcount	*dr_lc;
107 };
108 
109 /* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */
110 extern const struct bdevsw **bdevsw, *bdevsw0[];
111 extern const struct cdevsw **cdevsw, *cdevsw0[];
112 extern struct devsw_conv *devsw_conv, devsw_conv0[];
113 extern const int sys_bdevsws, sys_cdevsws;
114 extern int max_bdevsws, max_cdevsws, max_devsw_convs;
115 
116 static struct devswref *cdevswref;
117 static struct devswref *bdevswref;
118 static kcondvar_t devsw_cv;
119 
120 static int bdevsw_attach(const struct bdevsw *, devmajor_t *);
121 static int cdevsw_attach(const struct cdevsw *, devmajor_t *);
122 static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *);
123 
124 kmutex_t device_lock;
125 
126 void (*biodone_vfs)(buf_t *) = (void *)nullop;
127 
128 void
129 devsw_init(void)
130 {
131 
132 	KASSERT(sys_bdevsws < MAXDEVSW - 1);
133 	KASSERT(sys_cdevsws < MAXDEVSW - 1);
134 	mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE);
135 
136 	cv_init(&devsw_cv, "devsw");
137 }
138 
139 int
140 devsw_attach(const char *devname,
141 	     const struct bdevsw *bdev, devmajor_t *bmajor,
142 	     const struct cdevsw *cdev, devmajor_t *cmajor)
143 {
144 	struct devsw_conv *conv;
145 	char *name;
146 	int error, i;
147 
148 	if (devname == NULL || cdev == NULL)
149 		return EINVAL;
150 
151 	mutex_enter(&device_lock);
152 
153 	for (i = 0; i < max_devsw_convs; i++) {
154 		conv = &devsw_conv[i];
155 		if (conv->d_name == NULL || strcmp(devname, conv->d_name) != 0)
156 			continue;
157 
158 		if (*bmajor < 0)
159 			*bmajor = conv->d_bmajor;
160 		if (*cmajor < 0)
161 			*cmajor = conv->d_cmajor;
162 
163 		if (*bmajor != conv->d_bmajor || *cmajor != conv->d_cmajor) {
164 			error = EINVAL;
165 			goto out;
166 		}
167 		if ((*bmajor >= 0 && bdev == NULL) || *cmajor < 0) {
168 			error = EINVAL;
169 			goto out;
170 		}
171 
172 		if ((*bmajor >= 0 && bdevsw[*bmajor] != NULL) ||
173 		    cdevsw[*cmajor] != NULL) {
174 			error = EEXIST;
175 			goto out;
176 		}
177 		break;
178 	}
179 
180 	/*
181 	 * XXX This should allocate what it needs up front so we never
182 	 * need to flail around trying to unwind.
183 	 */
184 	error = bdevsw_attach(bdev, bmajor);
185 	if (error != 0)
186 		goto out;
187 	error = cdevsw_attach(cdev, cmajor);
188 	if (error != 0) {
189 		devsw_detach_locked(bdev, NULL);
190 		goto out;
191 	}
192 
193 	/*
194 	 * If we already found a conv, we're done.  Otherwise, find an
195 	 * empty slot or extend the table.
196 	 */
197 	if (i < max_devsw_convs) {
198 		error = 0;
199 		goto out;
200 	}
201 
202 	for (i = 0; i < max_devsw_convs; i++) {
203 		if (devsw_conv[i].d_name == NULL)
204 			break;
205 	}
206 	if (i == max_devsw_convs) {
207 		struct devsw_conv *newptr;
208 		int old_convs, new_convs;
209 
210 		old_convs = max_devsw_convs;
211 		new_convs = old_convs + 1;
212 
213 		newptr = kmem_zalloc(new_convs * DEVSWCONV_SIZE, KM_NOSLEEP);
214 		if (newptr == NULL) {
215 			devsw_detach_locked(bdev, cdev);
216 			error = ENOMEM;
217 			goto out;
218 		}
219 		newptr[old_convs].d_name = NULL;
220 		newptr[old_convs].d_bmajor = -1;
221 		newptr[old_convs].d_cmajor = -1;
222 		memcpy(newptr, devsw_conv, old_convs * DEVSWCONV_SIZE);
223 		if (devsw_conv != devsw_conv0)
224 			kmem_free(devsw_conv, old_convs * DEVSWCONV_SIZE);
225 		devsw_conv = newptr;
226 		max_devsw_convs = new_convs;
227 	}
228 
229 	name = kmem_strdupsize(devname, NULL, KM_NOSLEEP);
230 	if (name == NULL) {
231 		devsw_detach_locked(bdev, cdev);
232 		error = ENOMEM;
233 		goto out;
234 	}
235 
236 	devsw_conv[i].d_name = name;
237 	devsw_conv[i].d_bmajor = *bmajor;
238 	devsw_conv[i].d_cmajor = *cmajor;
239 	error = 0;
240 out:
241 	mutex_exit(&device_lock);
242 	return error;
243 }
244 
245 static int
246 bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor)
247 {
248 	const struct bdevsw **newbdevsw = NULL;
249 	struct devswref *newbdevswref = NULL;
250 	struct localcount *lc;
251 	devmajor_t bmajor;
252 	int i;
253 
254 	KASSERT(mutex_owned(&device_lock));
255 
256 	if (devsw == NULL)
257 		return 0;
258 
259 	if (*devmajor < 0) {
260 		for (bmajor = sys_bdevsws; bmajor < max_bdevsws; bmajor++) {
261 			if (bdevsw[bmajor] != NULL)
262 				continue;
263 			for (i = 0; i < max_devsw_convs; i++) {
264 				if (devsw_conv[i].d_bmajor == bmajor)
265 					break;
266 			}
267 			if (i != max_devsw_convs)
268 				continue;
269 			break;
270 		}
271 		*devmajor = bmajor;
272 	}
273 
274 	if (*devmajor >= MAXDEVSW) {
275 		printf("%s: block majors exhausted\n", __func__);
276 		return ENOMEM;
277 	}
278 
279 	if (bdevswref == NULL) {
280 		newbdevswref = kmem_zalloc(MAXDEVSW * sizeof(newbdevswref[0]),
281 		    KM_NOSLEEP);
282 		if (newbdevswref == NULL)
283 			return ENOMEM;
284 		atomic_store_release(&bdevswref, newbdevswref);
285 	}
286 
287 	if (*devmajor >= max_bdevsws) {
288 		KASSERT(bdevsw == bdevsw0);
289 		newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]),
290 		    KM_NOSLEEP);
291 		if (newbdevsw == NULL)
292 			return ENOMEM;
293 		memcpy(newbdevsw, bdevsw, max_bdevsws * sizeof(bdevsw[0]));
294 		atomic_store_release(&bdevsw, newbdevsw);
295 		atomic_store_release(&max_bdevsws, MAXDEVSW);
296 	}
297 
298 	if (bdevsw[*devmajor] != NULL)
299 		return EEXIST;
300 
301 	KASSERT(bdevswref[*devmajor].dr_lc == NULL);
302 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
303 	localcount_init(lc);
304 	bdevswref[*devmajor].dr_lc = lc;
305 
306 	atomic_store_release(&bdevsw[*devmajor], devsw);
307 
308 	return 0;
309 }
310 
311 static int
312 cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor)
313 {
314 	const struct cdevsw **newcdevsw = NULL;
315 	struct devswref *newcdevswref = NULL;
316 	struct localcount *lc;
317 	devmajor_t cmajor;
318 	int i;
319 
320 	KASSERT(mutex_owned(&device_lock));
321 
322 	if (*devmajor < 0) {
323 		for (cmajor = sys_cdevsws; cmajor < max_cdevsws; cmajor++) {
324 			if (cdevsw[cmajor] != NULL)
325 				continue;
326 			for (i = 0; i < max_devsw_convs; i++) {
327 				if (devsw_conv[i].d_cmajor == cmajor)
328 					break;
329 			}
330 			if (i != max_devsw_convs)
331 				continue;
332 			break;
333 		}
334 		*devmajor = cmajor;
335 	}
336 
337 	if (*devmajor >= MAXDEVSW) {
338 		printf("%s: character majors exhausted\n", __func__);
339 		return ENOMEM;
340 	}
341 
342 	if (cdevswref == NULL) {
343 		newcdevswref = kmem_zalloc(MAXDEVSW * sizeof(newcdevswref[0]),
344 		    KM_NOSLEEP);
345 		if (newcdevswref == NULL)
346 			return ENOMEM;
347 		atomic_store_release(&cdevswref, newcdevswref);
348 	}
349 
350 	if (*devmajor >= max_cdevsws) {
351 		KASSERT(cdevsw == cdevsw0);
352 		newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]),
353 		    KM_NOSLEEP);
354 		if (newcdevsw == NULL)
355 			return ENOMEM;
356 		memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0]));
357 		atomic_store_release(&cdevsw, newcdevsw);
358 		atomic_store_release(&max_cdevsws, MAXDEVSW);
359 	}
360 
361 	if (cdevsw[*devmajor] != NULL)
362 		return EEXIST;
363 
364 	KASSERT(cdevswref[*devmajor].dr_lc == NULL);
365 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
366 	localcount_init(lc);
367 	cdevswref[*devmajor].dr_lc = lc;
368 
369 	atomic_store_release(&cdevsw[*devmajor], devsw);
370 
371 	return 0;
372 }
373 
374 static void
375 devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev)
376 {
377 	int bi, ci = -1/*XXXGCC*/, di;
378 	struct cfdriver *cd;
379 	device_t dv;
380 
381 	KASSERT(mutex_owned(&device_lock));
382 
383 	/*
384 	 * If this is wired to an autoconf device, make sure the device
385 	 * has no more instances.  No locking here because under
386 	 * correct use of devsw_detach, none of this state can change
387 	 * at this point.
388 	 */
389 	if (cdev != NULL && (cd = cdev->d_cfdriver) != NULL) {
390 		for (di = 0; di < cd->cd_ndevs; di++) {
391 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
392 			    "detaching character device driver %s"
393 			    " still has attached unit %s",
394 			    cd->cd_name, device_xname(dv));
395 		}
396 	}
397 	if (bdev != NULL && (cd = bdev->d_cfdriver) != NULL) {
398 		for (di = 0; di < cd->cd_ndevs; di++) {
399 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
400 			    "detaching block device driver %s"
401 			    " still has attached unit %s",
402 			    cd->cd_name, device_xname(dv));
403 		}
404 	}
405 
406 	/* Prevent new references.  */
407 	if (bdev != NULL) {
408 		for (bi = 0; bi < max_bdevsws; bi++) {
409 			if (bdevsw[bi] != bdev)
410 				continue;
411 			atomic_store_relaxed(&bdevsw[bi], NULL);
412 			break;
413 		}
414 		KASSERT(bi < max_bdevsws);
415 	}
416 	if (cdev != NULL) {
417 		for (ci = 0; ci < max_cdevsws; ci++) {
418 			if (cdevsw[ci] != cdev)
419 				continue;
420 			atomic_store_relaxed(&cdevsw[ci], NULL);
421 			break;
422 		}
423 		KASSERT(ci < max_cdevsws);
424 	}
425 
426 	if (bdev == NULL && cdev == NULL) /* XXX possible? */
427 		return;
428 
429 	/*
430 	 * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire
431 	 * calls to notice that the devsw is gone.
432 	 *
433 	 * XXX Despite the use of the pserialize_read_enter/exit API
434 	 * elsewhere in this file, we use xc_barrier here instead of
435 	 * pserialize_perform -- because devsw_init is too early for
436 	 * pserialize_create.  Either pserialize_create should be made
437 	 * to work earlier, or it should be nixed altogether.  Until
438 	 * that is fixed, xc_barrier will serve the same purpose.
439 	 */
440 	xc_barrier(0);
441 
442 	/*
443 	 * Wait for all references to drain.  It is the caller's
444 	 * responsibility to ensure that at this point, there are no
445 	 * extant open instances and all new d_open calls will fail.
446 	 *
447 	 * Note that localcount_drain may release and reacquire
448 	 * device_lock.
449 	 */
450 	if (bdev != NULL) {
451 		localcount_drain(bdevswref[bi].dr_lc,
452 		    &devsw_cv, &device_lock);
453 		localcount_fini(bdevswref[bi].dr_lc);
454 		kmem_free(bdevswref[bi].dr_lc, sizeof(*bdevswref[bi].dr_lc));
455 		bdevswref[bi].dr_lc = NULL;
456 	}
457 	if (cdev != NULL) {
458 		localcount_drain(cdevswref[ci].dr_lc,
459 		    &devsw_cv, &device_lock);
460 		localcount_fini(cdevswref[ci].dr_lc);
461 		kmem_free(cdevswref[ci].dr_lc, sizeof(*cdevswref[ci].dr_lc));
462 		cdevswref[ci].dr_lc = NULL;
463 	}
464 }
465 
466 void
467 devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev)
468 {
469 
470 	mutex_enter(&device_lock);
471 	devsw_detach_locked(bdev, cdev);
472 	mutex_exit(&device_lock);
473 }
474 
475 /*
476  * Look up a block device by number.
477  *
478  * => Caller must ensure that the device is attached.
479  */
480 const struct bdevsw *
481 bdevsw_lookup(dev_t dev)
482 {
483 	devmajor_t bmajor;
484 
485 	if (dev == NODEV)
486 		return NULL;
487 	bmajor = major(dev);
488 	if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws))
489 		return NULL;
490 
491 	return atomic_load_consume(&bdevsw)[bmajor];
492 }
493 
494 static const struct bdevsw *
495 bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
496 {
497 	devmajor_t bmajor;
498 	const struct bdevsw *bdev = NULL, *const *curbdevsw;
499 	struct devswref *curbdevswref;
500 	int s;
501 
502 	if (dev == NODEV)
503 		return NULL;
504 	bmajor = major(dev);
505 	if (bmajor < 0)
506 		return NULL;
507 
508 	s = pserialize_read_enter();
509 
510 	/*
511 	 * max_bdevsws never goes down, so it is safe to rely on this
512 	 * condition without any locking for the array access below.
513 	 * Test sys_bdevsws first so we can avoid the memory barrier in
514 	 * that case.
515 	 */
516 	if (bmajor >= sys_bdevsws &&
517 	    bmajor >= atomic_load_acquire(&max_bdevsws))
518 		goto out;
519 	curbdevsw = atomic_load_consume(&bdevsw);
520 	if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL)
521 		goto out;
522 
523 	curbdevswref = atomic_load_consume(&bdevswref);
524 	if (curbdevswref == NULL) {
525 		*lcp = NULL;
526 	} else if ((*lcp = curbdevswref[bmajor].dr_lc) != NULL) {
527 		localcount_acquire(*lcp);
528 	}
529 out:
530 	pserialize_read_exit(s);
531 	return bdev;
532 }
533 
534 static void
535 bdevsw_release(const struct bdevsw *bdev, struct localcount *lc)
536 {
537 
538 	if (lc == NULL)
539 		return;
540 	localcount_release(lc, &devsw_cv, &device_lock);
541 }
542 
543 /*
544  * Look up a character device by number.
545  *
546  * => Caller must ensure that the device is attached.
547  */
548 const struct cdevsw *
549 cdevsw_lookup(dev_t dev)
550 {
551 	devmajor_t cmajor;
552 
553 	if (dev == NODEV)
554 		return NULL;
555 	cmajor = major(dev);
556 	if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws))
557 		return NULL;
558 
559 	return atomic_load_consume(&cdevsw)[cmajor];
560 }
561 
562 static const struct cdevsw *
563 cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
564 {
565 	devmajor_t cmajor;
566 	const struct cdevsw *cdev = NULL, *const *curcdevsw;
567 	struct devswref *curcdevswref;
568 	int s;
569 
570 	if (dev == NODEV)
571 		return NULL;
572 	cmajor = major(dev);
573 	if (cmajor < 0)
574 		return NULL;
575 
576 	s = pserialize_read_enter();
577 
578 	/*
579 	 * max_cdevsws never goes down, so it is safe to rely on this
580 	 * condition without any locking for the array access below.
581 	 * Test sys_cdevsws first so we can avoid the memory barrier in
582 	 * that case.
583 	 */
584 	if (cmajor >= sys_cdevsws &&
585 	    cmajor >= atomic_load_acquire(&max_cdevsws))
586 		goto out;
587 	curcdevsw = atomic_load_consume(&cdevsw);
588 	if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL)
589 		goto out;
590 
591 	curcdevswref = atomic_load_consume(&cdevswref);
592 	if (curcdevswref == NULL) {
593 		*lcp = NULL;
594 	} else if ((*lcp = curcdevswref[cmajor].dr_lc) != NULL) {
595 		localcount_acquire(*lcp);
596 	}
597 out:
598 	pserialize_read_exit(s);
599 	return cdev;
600 }
601 
602 static void
603 cdevsw_release(const struct cdevsw *cdev, struct localcount *lc)
604 {
605 
606 	if (lc == NULL)
607 		return;
608 	localcount_release(lc, &devsw_cv, &device_lock);
609 }
610 
611 /*
612  * Look up a block device by reference to its operations set.
613  *
614  * => Caller must ensure that the device is not detached, and therefore
615  *    that the returned major is still valid when dereferenced.
616  */
617 devmajor_t
618 bdevsw_lookup_major(const struct bdevsw *bdev)
619 {
620 	const struct bdevsw *const *curbdevsw;
621 	devmajor_t bmajor, bmax;
622 
623 	bmax = atomic_load_acquire(&max_bdevsws);
624 	curbdevsw = atomic_load_consume(&bdevsw);
625 	for (bmajor = 0; bmajor < bmax; bmajor++) {
626 		if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev)
627 			return bmajor;
628 	}
629 
630 	return NODEVMAJOR;
631 }
632 
633 /*
634  * Look up a character device by reference to its operations set.
635  *
636  * => Caller must ensure that the device is not detached, and therefore
637  *    that the returned major is still valid when dereferenced.
638  */
639 devmajor_t
640 cdevsw_lookup_major(const struct cdevsw *cdev)
641 {
642 	const struct cdevsw *const *curcdevsw;
643 	devmajor_t cmajor, cmax;
644 
645 	cmax = atomic_load_acquire(&max_cdevsws);
646 	curcdevsw = atomic_load_consume(&cdevsw);
647 	for (cmajor = 0; cmajor < cmax; cmajor++) {
648 		if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev)
649 			return cmajor;
650 	}
651 
652 	return NODEVMAJOR;
653 }
654 
655 /*
656  * Convert from block major number to name.
657  *
658  * => Caller must ensure that the device is not detached, and therefore
659  *    that the name pointer is still valid when dereferenced.
660  */
661 const char *
662 devsw_blk2name(devmajor_t bmajor)
663 {
664 	const char *name;
665 	devmajor_t cmajor;
666 	int i;
667 
668 	name = NULL;
669 	cmajor = -1;
670 
671 	mutex_enter(&device_lock);
672 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
673 		mutex_exit(&device_lock);
674 		return NULL;
675 	}
676 	for (i = 0; i < max_devsw_convs; i++) {
677 		if (devsw_conv[i].d_bmajor == bmajor) {
678 			cmajor = devsw_conv[i].d_cmajor;
679 			break;
680 		}
681 	}
682 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
683 		name = devsw_conv[i].d_name;
684 	mutex_exit(&device_lock);
685 
686 	return name;
687 }
688 
689 /*
690  * Convert char major number to device driver name.
691  */
692 const char *
693 cdevsw_getname(devmajor_t major)
694 {
695 	const char *name;
696 	int i;
697 
698 	name = NULL;
699 
700 	if (major < 0)
701 		return NULL;
702 
703 	mutex_enter(&device_lock);
704 	for (i = 0; i < max_devsw_convs; i++) {
705 		if (devsw_conv[i].d_cmajor == major) {
706 			name = devsw_conv[i].d_name;
707 			break;
708 		}
709 	}
710 	mutex_exit(&device_lock);
711 	return name;
712 }
713 
714 /*
715  * Convert block major number to device driver name.
716  */
717 const char *
718 bdevsw_getname(devmajor_t major)
719 {
720 	const char *name;
721 	int i;
722 
723 	name = NULL;
724 
725 	if (major < 0)
726 		return NULL;
727 
728 	mutex_enter(&device_lock);
729 	for (i = 0; i < max_devsw_convs; i++) {
730 		if (devsw_conv[i].d_bmajor == major) {
731 			name = devsw_conv[i].d_name;
732 			break;
733 		}
734 	}
735 	mutex_exit(&device_lock);
736 	return name;
737 }
738 
739 /*
740  * Convert from device name to block major number.
741  *
742  * => Caller must ensure that the device is not detached, and therefore
743  *    that the major number is still valid when dereferenced.
744  */
745 devmajor_t
746 devsw_name2blk(const char *name, char *devname, size_t devnamelen)
747 {
748 	struct devsw_conv *conv;
749 	devmajor_t bmajor;
750 	int i;
751 
752 	if (name == NULL)
753 		return NODEVMAJOR;
754 
755 	mutex_enter(&device_lock);
756 	for (i = 0; i < max_devsw_convs; i++) {
757 		size_t len;
758 
759 		conv = &devsw_conv[i];
760 		if (conv->d_name == NULL)
761 			continue;
762 		len = strlen(conv->d_name);
763 		if (strncmp(conv->d_name, name, len) != 0)
764 			continue;
765 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
766 			continue;
767 		bmajor = conv->d_bmajor;
768 		if (bmajor < 0 || bmajor >= max_bdevsws ||
769 		    bdevsw[bmajor] == NULL)
770 			break;
771 		if (devname != NULL) {
772 #ifdef DEVSW_DEBUG
773 			if (strlen(conv->d_name) >= devnamelen)
774 				printf("%s: too short buffer\n", __func__);
775 #endif /* DEVSW_DEBUG */
776 			strncpy(devname, conv->d_name, devnamelen);
777 			devname[devnamelen - 1] = '\0';
778 		}
779 		mutex_exit(&device_lock);
780 		return bmajor;
781 	}
782 
783 	mutex_exit(&device_lock);
784 	return NODEVMAJOR;
785 }
786 
787 /*
788  * Convert from device name to char major number.
789  *
790  * => Caller must ensure that the device is not detached, and therefore
791  *    that the major number is still valid when dereferenced.
792  */
793 devmajor_t
794 devsw_name2chr(const char *name, char *devname, size_t devnamelen)
795 {
796 	struct devsw_conv *conv;
797 	devmajor_t cmajor;
798 	int i;
799 
800 	if (name == NULL)
801 		return NODEVMAJOR;
802 
803 	mutex_enter(&device_lock);
804 	for (i = 0; i < max_devsw_convs; i++) {
805 		size_t len;
806 
807 		conv = &devsw_conv[i];
808 		if (conv->d_name == NULL)
809 			continue;
810 		len = strlen(conv->d_name);
811 		if (strncmp(conv->d_name, name, len) != 0)
812 			continue;
813 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
814 			continue;
815 		cmajor = conv->d_cmajor;
816 		if (cmajor < 0 || cmajor >= max_cdevsws ||
817 		    cdevsw[cmajor] == NULL)
818 			break;
819 		if (devname != NULL) {
820 #ifdef DEVSW_DEBUG
821 			if (strlen(conv->d_name) >= devnamelen)
822 				printf("%s: too short buffer", __func__);
823 #endif /* DEVSW_DEBUG */
824 			strncpy(devname, conv->d_name, devnamelen);
825 			devname[devnamelen - 1] = '\0';
826 		}
827 		mutex_exit(&device_lock);
828 		return cmajor;
829 	}
830 
831 	mutex_exit(&device_lock);
832 	return NODEVMAJOR;
833 }
834 
835 /*
836  * Convert from character dev_t to block dev_t.
837  *
838  * => Caller must ensure that the device is not detached, and therefore
839  *    that the major number is still valid when dereferenced.
840  */
841 dev_t
842 devsw_chr2blk(dev_t cdev)
843 {
844 	devmajor_t bmajor, cmajor;
845 	int i;
846 	dev_t rv;
847 
848 	cmajor = major(cdev);
849 	bmajor = NODEVMAJOR;
850 	rv = NODEV;
851 
852 	mutex_enter(&device_lock);
853 	if (cmajor < 0 || cmajor >= max_cdevsws || cdevsw[cmajor] == NULL) {
854 		mutex_exit(&device_lock);
855 		return NODEV;
856 	}
857 	for (i = 0; i < max_devsw_convs; i++) {
858 		if (devsw_conv[i].d_cmajor == cmajor) {
859 			bmajor = devsw_conv[i].d_bmajor;
860 			break;
861 		}
862 	}
863 	if (bmajor >= 0 && bmajor < max_bdevsws && bdevsw[bmajor] != NULL)
864 		rv = makedev(bmajor, minor(cdev));
865 	mutex_exit(&device_lock);
866 
867 	return rv;
868 }
869 
870 /*
871  * Convert from block dev_t to character dev_t.
872  *
873  * => Caller must ensure that the device is not detached, and therefore
874  *    that the major number is still valid when dereferenced.
875  */
876 dev_t
877 devsw_blk2chr(dev_t bdev)
878 {
879 	devmajor_t bmajor, cmajor;
880 	int i;
881 	dev_t rv;
882 
883 	bmajor = major(bdev);
884 	cmajor = NODEVMAJOR;
885 	rv = NODEV;
886 
887 	mutex_enter(&device_lock);
888 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
889 		mutex_exit(&device_lock);
890 		return NODEV;
891 	}
892 	for (i = 0; i < max_devsw_convs; i++) {
893 		if (devsw_conv[i].d_bmajor == bmajor) {
894 			cmajor = devsw_conv[i].d_cmajor;
895 			break;
896 		}
897 	}
898 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
899 		rv = makedev(cmajor, minor(bdev));
900 	mutex_exit(&device_lock);
901 
902 	return rv;
903 }
904 
905 /*
906  * Device access methods.
907  */
908 
909 #define	DEV_LOCK(d)						\
910 	if ((mpflag = (d->d_flag & D_MPSAFE)) == 0) {		\
911 		KERNEL_LOCK(1, NULL);				\
912 	}
913 
914 #define	DEV_UNLOCK(d)						\
915 	if (mpflag == 0) {					\
916 		KERNEL_UNLOCK_ONE(NULL);			\
917 	}
918 
919 int
920 bdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
921 {
922 	const struct bdevsw *d;
923 	struct localcount *lc;
924 	device_t dv = NULL/*XXXGCC*/;
925 	int unit, rv, mpflag;
926 
927 	d = bdevsw_lookup_acquire(dev, &lc);
928 	if (d == NULL)
929 		return ENXIO;
930 
931 	if (d->d_devtounit) {
932 		/*
933 		 * If the device node corresponds to an autoconf device
934 		 * instance, acquire a reference to it so that during
935 		 * d_open, device_lookup is stable.
936 		 *
937 		 * XXX This should also arrange to instantiate cloning
938 		 * pseudo-devices if appropriate, but that requires
939 		 * reviewing them all to find and verify a common
940 		 * pattern.
941 		 */
942 		if ((unit = (*d->d_devtounit)(dev)) == -1)
943 			return ENXIO;
944 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
945 			return ENXIO;
946 	}
947 
948 	DEV_LOCK(d);
949 	rv = (*d->d_open)(dev, flag, devtype, l);
950 	DEV_UNLOCK(d);
951 
952 	if (d->d_devtounit) {
953 		device_release(dv);
954 	}
955 
956 	bdevsw_release(d, lc);
957 
958 	return rv;
959 }
960 
961 int
962 bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
963 {
964 	const struct bdevsw *d;
965 	int rv, mpflag;
966 
967 	if ((d = bdevsw_lookup(dev)) == NULL)
968 		return ENXIO;
969 	if (d->d_cancel == NULL)
970 		return ENODEV;
971 
972 	DEV_LOCK(d);
973 	rv = (*d->d_cancel)(dev, flag, devtype, l);
974 	DEV_UNLOCK(d);
975 
976 	return rv;
977 }
978 
979 int
980 bdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
981 {
982 	const struct bdevsw *d;
983 	int rv, mpflag;
984 
985 	if ((d = bdevsw_lookup(dev)) == NULL)
986 		return ENXIO;
987 
988 	DEV_LOCK(d);
989 	rv = (*d->d_close)(dev, flag, devtype, l);
990 	DEV_UNLOCK(d);
991 
992 	return rv;
993 }
994 
995 SDT_PROVIDER_DECLARE(io);
996 SDT_PROBE_DEFINE1(io, kernel, , start, "struct buf *"/*bp*/);
997 
998 void
999 bdev_strategy(struct buf *bp)
1000 {
1001 	const struct bdevsw *d;
1002 	int mpflag;
1003 
1004 	SDT_PROBE1(io, kernel, , start, bp);
1005 
1006 	if ((d = bdevsw_lookup(bp->b_dev)) == NULL) {
1007 		bp->b_error = ENXIO;
1008 		bp->b_resid = bp->b_bcount;
1009 		biodone_vfs(bp); /* biodone() iff vfs present */
1010 		return;
1011 	}
1012 
1013 	DEV_LOCK(d);
1014 	(*d->d_strategy)(bp);
1015 	DEV_UNLOCK(d);
1016 }
1017 
1018 int
1019 bdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1020 {
1021 	const struct bdevsw *d;
1022 	int rv, mpflag;
1023 
1024 	if ((d = bdevsw_lookup(dev)) == NULL)
1025 		return ENXIO;
1026 
1027 	DEV_LOCK(d);
1028 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
1029 	DEV_UNLOCK(d);
1030 
1031 	return rv;
1032 }
1033 
1034 int
1035 bdev_dump(dev_t dev, daddr_t addr, void *data, size_t sz)
1036 {
1037 	const struct bdevsw *d;
1038 	int rv;
1039 
1040 	/*
1041 	 * Dump can be called without the device open.  Since it can
1042 	 * currently only be called with the system paused (and in a
1043 	 * potentially unstable state), we don't perform any locking.
1044 	 */
1045 	if ((d = bdevsw_lookup(dev)) == NULL)
1046 		return ENXIO;
1047 
1048 	/* DEV_LOCK(d); */
1049 	rv = (*d->d_dump)(dev, addr, data, sz);
1050 	/* DEV_UNLOCK(d); */
1051 
1052 	return rv;
1053 }
1054 
1055 int
1056 bdev_flags(dev_t dev)
1057 {
1058 	const struct bdevsw *d;
1059 
1060 	if ((d = bdevsw_lookup(dev)) == NULL)
1061 		return 0;
1062 	return d->d_flag & ~D_TYPEMASK;
1063 }
1064 
1065 int
1066 bdev_type(dev_t dev)
1067 {
1068 	const struct bdevsw *d;
1069 
1070 	if ((d = bdevsw_lookup(dev)) == NULL)
1071 		return D_OTHER;
1072 	return d->d_flag & D_TYPEMASK;
1073 }
1074 
1075 int
1076 bdev_size(dev_t dev)
1077 {
1078 	const struct bdevsw *d;
1079 	int rv, mpflag = 0;
1080 
1081 	if ((d = bdevsw_lookup(dev)) == NULL ||
1082 	    d->d_psize == NULL)
1083 		return -1;
1084 
1085 	/*
1086 	 * Don't to try lock the device if we're dumping.
1087 	 * XXX: is there a better way to test this?
1088 	 */
1089 	if ((boothowto & RB_DUMP) == 0)
1090 		DEV_LOCK(d);
1091 	rv = (*d->d_psize)(dev);
1092 	if ((boothowto & RB_DUMP) == 0)
1093 		DEV_UNLOCK(d);
1094 
1095 	return rv;
1096 }
1097 
1098 int
1099 bdev_discard(dev_t dev, off_t pos, off_t len)
1100 {
1101 	const struct bdevsw *d;
1102 	int rv, mpflag;
1103 
1104 	if ((d = bdevsw_lookup(dev)) == NULL)
1105 		return ENXIO;
1106 
1107 	DEV_LOCK(d);
1108 	rv = (*d->d_discard)(dev, pos, len);
1109 	DEV_UNLOCK(d);
1110 
1111 	return rv;
1112 }
1113 
1114 void
1115 bdev_detached(dev_t dev)
1116 {
1117 	const struct bdevsw *d;
1118 	device_t dv;
1119 	int unit;
1120 
1121 	if ((d = bdevsw_lookup(dev)) == NULL)
1122 		return;
1123 	if (d->d_devtounit == NULL)
1124 		return;
1125 	if ((unit = (*d->d_devtounit)(dev)) == -1)
1126 		return;
1127 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
1128 		return;
1129 	config_detach_commit(dv);
1130 }
1131 
1132 int
1133 cdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
1134 {
1135 	const struct cdevsw *d;
1136 	struct localcount *lc;
1137 	device_t dv = NULL/*XXXGCC*/;
1138 	int unit, rv, mpflag;
1139 
1140 	d = cdevsw_lookup_acquire(dev, &lc);
1141 	if (d == NULL)
1142 		return ENXIO;
1143 
1144 	if (d->d_devtounit) {
1145 		/*
1146 		 * If the device node corresponds to an autoconf device
1147 		 * instance, acquire a reference to it so that during
1148 		 * d_open, device_lookup is stable.
1149 		 *
1150 		 * XXX This should also arrange to instantiate cloning
1151 		 * pseudo-devices if appropriate, but that requires
1152 		 * reviewing them all to find and verify a common
1153 		 * pattern.
1154 		 */
1155 		if ((unit = (*d->d_devtounit)(dev)) == -1)
1156 			return ENXIO;
1157 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
1158 			return ENXIO;
1159 	}
1160 
1161 	DEV_LOCK(d);
1162 	rv = (*d->d_open)(dev, flag, devtype, l);
1163 	DEV_UNLOCK(d);
1164 
1165 	if (d->d_devtounit) {
1166 		device_release(dv);
1167 	}
1168 
1169 	cdevsw_release(d, lc);
1170 
1171 	return rv;
1172 }
1173 
1174 int
1175 cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
1176 {
1177 	const struct cdevsw *d;
1178 	int rv, mpflag;
1179 
1180 	if ((d = cdevsw_lookup(dev)) == NULL)
1181 		return ENXIO;
1182 	if (d->d_cancel == NULL)
1183 		return ENODEV;
1184 
1185 	DEV_LOCK(d);
1186 	rv = (*d->d_cancel)(dev, flag, devtype, l);
1187 	DEV_UNLOCK(d);
1188 
1189 	return rv;
1190 }
1191 
1192 int
1193 cdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
1194 {
1195 	const struct cdevsw *d;
1196 	int rv, mpflag;
1197 
1198 	if ((d = cdevsw_lookup(dev)) == NULL)
1199 		return ENXIO;
1200 
1201 	DEV_LOCK(d);
1202 	rv = (*d->d_close)(dev, flag, devtype, l);
1203 	DEV_UNLOCK(d);
1204 
1205 	return rv;
1206 }
1207 
1208 int
1209 cdev_read(dev_t dev, struct uio *uio, int flag)
1210 {
1211 	const struct cdevsw *d;
1212 	int rv, mpflag;
1213 
1214 	if ((d = cdevsw_lookup(dev)) == NULL)
1215 		return ENXIO;
1216 
1217 	DEV_LOCK(d);
1218 	rv = (*d->d_read)(dev, uio, flag);
1219 	DEV_UNLOCK(d);
1220 
1221 	return rv;
1222 }
1223 
1224 int
1225 cdev_write(dev_t dev, struct uio *uio, int flag)
1226 {
1227 	const struct cdevsw *d;
1228 	int rv, mpflag;
1229 
1230 	if ((d = cdevsw_lookup(dev)) == NULL)
1231 		return ENXIO;
1232 
1233 	DEV_LOCK(d);
1234 	rv = (*d->d_write)(dev, uio, flag);
1235 	DEV_UNLOCK(d);
1236 
1237 	return rv;
1238 }
1239 
1240 int
1241 cdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1242 {
1243 	const struct cdevsw *d;
1244 	int rv, mpflag;
1245 
1246 	if ((d = cdevsw_lookup(dev)) == NULL)
1247 		return ENXIO;
1248 
1249 	DEV_LOCK(d);
1250 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
1251 	DEV_UNLOCK(d);
1252 
1253 	return rv;
1254 }
1255 
1256 void
1257 cdev_stop(struct tty *tp, int flag)
1258 {
1259 	const struct cdevsw *d;
1260 	int mpflag;
1261 
1262 	if ((d = cdevsw_lookup(tp->t_dev)) == NULL)
1263 		return;
1264 
1265 	DEV_LOCK(d);
1266 	(*d->d_stop)(tp, flag);
1267 	DEV_UNLOCK(d);
1268 }
1269 
1270 struct tty *
1271 cdev_tty(dev_t dev)
1272 {
1273 	const struct cdevsw *d;
1274 
1275 	if ((d = cdevsw_lookup(dev)) == NULL)
1276 		return NULL;
1277 
1278 	/* XXX Check if necessary. */
1279 	if (d->d_tty == NULL)
1280 		return NULL;
1281 
1282 	return (*d->d_tty)(dev);
1283 }
1284 
1285 int
1286 cdev_poll(dev_t dev, int flag, lwp_t *l)
1287 {
1288 	const struct cdevsw *d;
1289 	int rv, mpflag;
1290 
1291 	if ((d = cdevsw_lookup(dev)) == NULL)
1292 		return POLLERR;
1293 
1294 	DEV_LOCK(d);
1295 	rv = (*d->d_poll)(dev, flag, l);
1296 	DEV_UNLOCK(d);
1297 
1298 	return rv;
1299 }
1300 
1301 paddr_t
1302 cdev_mmap(dev_t dev, off_t off, int flag)
1303 {
1304 	const struct cdevsw *d;
1305 	paddr_t rv;
1306 	int mpflag;
1307 
1308 	if ((d = cdevsw_lookup(dev)) == NULL)
1309 		return (paddr_t)-1LL;
1310 
1311 	DEV_LOCK(d);
1312 	rv = (*d->d_mmap)(dev, off, flag);
1313 	DEV_UNLOCK(d);
1314 
1315 	return rv;
1316 }
1317 
1318 int
1319 cdev_kqfilter(dev_t dev, struct knote *kn)
1320 {
1321 	const struct cdevsw *d;
1322 	int rv, mpflag;
1323 
1324 	if ((d = cdevsw_lookup(dev)) == NULL)
1325 		return ENXIO;
1326 
1327 	DEV_LOCK(d);
1328 	rv = (*d->d_kqfilter)(dev, kn);
1329 	DEV_UNLOCK(d);
1330 
1331 	return rv;
1332 }
1333 
1334 int
1335 cdev_discard(dev_t dev, off_t pos, off_t len)
1336 {
1337 	const struct cdevsw *d;
1338 	int rv, mpflag;
1339 
1340 	if ((d = cdevsw_lookup(dev)) == NULL)
1341 		return ENXIO;
1342 
1343 	DEV_LOCK(d);
1344 	rv = (*d->d_discard)(dev, pos, len);
1345 	DEV_UNLOCK(d);
1346 
1347 	return rv;
1348 }
1349 
1350 int
1351 cdev_flags(dev_t dev)
1352 {
1353 	const struct cdevsw *d;
1354 
1355 	if ((d = cdevsw_lookup(dev)) == NULL)
1356 		return 0;
1357 	return d->d_flag & ~D_TYPEMASK;
1358 }
1359 
1360 int
1361 cdev_type(dev_t dev)
1362 {
1363 	const struct cdevsw *d;
1364 
1365 	if ((d = cdevsw_lookup(dev)) == NULL)
1366 		return D_OTHER;
1367 	return d->d_flag & D_TYPEMASK;
1368 }
1369 
1370 void
1371 cdev_detached(dev_t dev)
1372 {
1373 	const struct cdevsw *d;
1374 	device_t dv;
1375 	int unit;
1376 
1377 	if ((d = cdevsw_lookup(dev)) == NULL)
1378 		return;
1379 	if (d->d_devtounit == NULL)
1380 		return;
1381 	if ((unit = (*d->d_devtounit)(dev)) == -1)
1382 		return;
1383 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
1384 		return;
1385 	config_detach_commit(dv);
1386 }
1387 
1388 /*
1389  * nommap(dev, off, prot)
1390  *
1391  *	mmap routine that always fails, for non-mmappable devices.
1392  */
1393 paddr_t
1394 nommap(dev_t dev, off_t off, int prot)
1395 {
1396 
1397 	return (paddr_t)-1;
1398 }
1399 
1400 /*
1401  * dev_minor_unit(dev)
1402  *
1403  *	Returns minor(dev) as an int.  Intended for use with struct
1404  *	bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are
1405  *	implemented by reference to an autoconf instance with the minor
1406  *	number.
1407  */
1408 int
1409 dev_minor_unit(dev_t dev)
1410 {
1411 
1412 	return minor(dev);
1413 }
1414