xref: /netbsd-src/sys/kern/subr_devsw.c (revision 9937a29b04425f4a1e595bd5e81877ddf01440f2)
1 /*	$NetBSD: subr_devsw.c,v 1.53 2024/10/13 22:25:38 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by MAEKAWA Masahide <gehenna@NetBSD.org>, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Overview
34  *
35  *	subr_devsw.c: registers device drivers by name and by major
36  *	number, and provides wrapper methods for performing I/O and
37  *	other tasks on device drivers, keying on the device number
38  *	(dev_t).
39  *
40  *	When the system is built, the config(8) command generates
41  *	static tables of device drivers built into the kernel image
42  *	along with their associated methods.  These are recorded in
43  *	the cdevsw0 and bdevsw0 tables.  Drivers can also be added to
44  *	and removed from the system dynamically.
45  *
46  * Allocation
47  *
48  *	When the system initially boots only the statically allocated
49  *	indexes (bdevsw0, cdevsw0) are used.  If these overflow due to
50  *	allocation, we allocate a fixed block of memory to hold the new,
51  *	expanded index.  This "fork" of the table is only ever performed
52  *	once in order to guarantee that other threads may safely access
53  *	the device tables:
54  *
55  *	o Once a thread has a "reference" to the table via an earlier
56  *	  open() call, we know that the entry in the table must exist
57  *	  and so it is safe to access it.
58  *
59  *	o Regardless of whether other threads see the old or new
60  *	  pointers, they will point to a correct device switch
61  *	  structure for the operation being performed.
62  *
63  *	XXX Currently, the wrapper methods such as cdev_read() verify
64  *	that a device driver does in fact exist before calling the
65  *	associated driver method.  This should be changed so that
66  *	once the device is has been referenced by a vnode (opened),
67  *	calling	the other methods should be valid until that reference
68  *	is dropped.
69  */
70 
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.53 2024/10/13 22:25:38 chs Exp $");
73 
74 #ifdef _KERNEL_OPT
75 #include "opt_dtrace.h"
76 #endif
77 
78 #include <sys/param.h>
79 #include <sys/conf.h>
80 #include <sys/kmem.h>
81 #include <sys/systm.h>
82 #include <sys/poll.h>
83 #include <sys/tty.h>
84 #include <sys/cpu.h>
85 #include <sys/buf.h>
86 #include <sys/reboot.h>
87 #include <sys/sdt.h>
88 #include <sys/atomic.h>
89 #include <sys/localcount.h>
90 #include <sys/pserialize.h>
91 #include <sys/xcall.h>
92 #include <sys/device.h>
93 
94 #ifdef DEVSW_DEBUG
95 #define	DPRINTF(x)	printf x
96 #else /* DEVSW_DEBUG */
97 #define	DPRINTF(x)
98 #endif /* DEVSW_DEBUG */
99 
100 #define	MAXDEVSW	512	/* the maximum of major device number */
101 #define	BDEVSW_SIZE	(sizeof(struct bdevsw *))
102 #define	CDEVSW_SIZE	(sizeof(struct cdevsw *))
103 #define	DEVSWCONV_SIZE	(sizeof(struct devsw_conv))
104 
105 struct devswref {
106 	struct localcount	*dr_lc;
107 };
108 
109 /* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */
110 extern const struct bdevsw **bdevsw, *bdevsw0[];
111 extern const struct cdevsw **cdevsw, *cdevsw0[];
112 extern struct devsw_conv *devsw_conv, devsw_conv0[];
113 extern const int sys_bdevsws, sys_cdevsws;
114 extern int max_bdevsws, max_cdevsws, max_devsw_convs;
115 
116 static struct devswref *cdevswref;
117 static struct devswref *bdevswref;
118 static kcondvar_t devsw_cv;
119 
120 static int bdevsw_attach(const struct bdevsw *, devmajor_t *);
121 static int cdevsw_attach(const struct cdevsw *, devmajor_t *);
122 static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *);
123 
124 kmutex_t device_lock;
125 
126 void (*biodone_vfs)(buf_t *) = (void *)nullop;
127 
128 /*
129  * bdev probes
130  */
131 SDT_PROBE_DEFINE6(sdt, bdev, open, acquire,
132     "struct bdevsw *"/*bdevsw*/,
133     "dev_t"/*dev*/,
134     "int"/*flag*/,
135     "int"/*devtype*/,
136     "int"/*unit*/,
137     "device_t"/*dv*/);
138 SDT_PROBE_DEFINE4(sdt, bdev, open, entry,
139     "struct bdevsw *"/*bdevsw*/,
140     "dev_t"/*dev*/,
141     "int"/*flag*/,
142     "int"/*devtype*/);
143 SDT_PROBE_DEFINE5(sdt, bdev, open, return,
144     "struct bdevsw *"/*bdevsw*/,
145     "dev_t"/*dev*/,
146     "int"/*flag*/,
147     "int"/*devtype*/,
148     "int"/*error*/);
149 SDT_PROBE_DEFINE6(sdt, bdev, open, release,
150     "struct bdevsw *"/*bdevsw*/,
151     "dev_t"/*dev*/,
152     "int"/*flag*/,
153     "int"/*devtype*/,
154     "int"/*unit*/,
155     "device_t"/*dv*/);
156 
157 SDT_PROBE_DEFINE4(sdt, bdev, cancel, entry,
158     "struct bdevsw *"/*bdevsw*/,
159     "dev_t"/*dev*/,
160     "int"/*flag*/,
161     "int"/*devtype*/);
162 SDT_PROBE_DEFINE5(sdt, bdev, cancel, return,
163     "struct bdevsw *"/*bdevsw*/,
164     "dev_t"/*dev*/,
165     "int"/*flag*/,
166     "int"/*devtype*/,
167     "int"/*error*/);
168 
169 SDT_PROBE_DEFINE4(sdt, bdev, close, entry,
170     "struct bdevsw *"/*bdevsw*/,
171     "dev_t"/*dev*/,
172     "int"/*flag*/,
173     "int"/*devtype*/);
174 SDT_PROBE_DEFINE5(sdt, bdev, close, return,
175     "struct bdevsw *"/*bdevsw*/,
176     "dev_t"/*dev*/,
177     "int"/*flag*/,
178     "int"/*devtype*/,
179     "int"/*error*/);
180 
181 SDT_PROBE_DEFINE3(sdt, bdev, strategy, entry,
182     "struct bdevsw *"/*bdevsw*/,
183     "dev_t"/*dev*/,
184     "struct buf *"/*bp*/);
185 SDT_PROBE_DEFINE3(sdt, bdev, strategy, return,
186     "struct bdevsw *"/*bdevsw*/,
187     "dev_t"/*dev*/,
188     "struct buf *"/*bp*/);
189 
190 SDT_PROBE_DEFINE5(sdt, bdev, ioctl, entry,
191     "struct bdevsw *"/*bdevsw*/,
192     "dev_t"/*dev*/,
193     "unsigned long"/*cmd*/,
194     "void *"/*data*/,
195     "int"/*flag*/);
196 SDT_PROBE_DEFINE6(sdt, bdev, ioctl, return,
197     "struct bdevsw *"/*bdevsw*/,
198     "dev_t"/*dev*/,
199     "unsigned long"/*cmd*/,
200     "void *"/*data*/,
201     "int"/*flag*/,
202     "int"/*error*/);
203 
204 SDT_PROBE_DEFINE2(sdt, bdev, psize, entry,
205     "struct bdevsw *"/*bdevsw*/,
206     "dev_t"/*dev*/);
207 SDT_PROBE_DEFINE3(sdt, bdev, psize, return,
208     "struct bdevsw *"/*bdevsw*/,
209     "dev_t"/*dev*/,
210     "int"/*psize*/);
211 
212 SDT_PROBE_DEFINE4(sdt, bdev, discard, entry,
213     "struct bdevsw *"/*bdevsw*/,
214     "dev_t"/*dev*/,
215     "off_t"/*pos*/,
216     "off_t"/*len*/);
217 SDT_PROBE_DEFINE5(sdt, bdev, discard, return,
218     "struct bdevsw *"/*bdevsw*/,
219     "dev_t"/*dev*/,
220     "off_t"/*pos*/,
221     "off_t"/*len*/,
222     "int"/*error*/);
223 
224 /*
225  * cdev probes
226  */
227 SDT_PROBE_DEFINE6(sdt, cdev, open, acquire,
228     "struct cdevsw *"/*cdevsw*/,
229     "dev_t"/*dev*/,
230     "int"/*flag*/,
231     "int"/*devtype*/,
232     "int"/*unit*/,
233     "device_t"/*dv*/);
234 SDT_PROBE_DEFINE4(sdt, cdev, open, entry,
235     "struct cdevsw *"/*cdevsw*/,
236     "dev_t"/*dev*/,
237     "int"/*flag*/,
238     "int"/*devtype*/);
239 SDT_PROBE_DEFINE5(sdt, cdev, open, return,
240     "struct cdevsw *"/*cdevsw*/,
241     "dev_t"/*dev*/,
242     "int"/*flag*/,
243     "int"/*devtype*/,
244     "int"/*error*/);
245 SDT_PROBE_DEFINE6(sdt, cdev, open, release,
246     "struct cdevsw *"/*cdevsw*/,
247     "dev_t"/*dev*/,
248     "int"/*flag*/,
249     "int"/*devtype*/,
250     "int"/*unit*/,
251     "device_t"/*dv*/);
252 
253 SDT_PROBE_DEFINE4(sdt, cdev, cancel, entry,
254     "struct cdevsw *"/*cdevsw*/,
255     "dev_t"/*dev*/,
256     "int"/*flag*/,
257     "int"/*devtype*/);
258 SDT_PROBE_DEFINE5(sdt, cdev, cancel, return,
259     "struct cdevsw *"/*cdevsw*/,
260     "dev_t"/*dev*/,
261     "int"/*flag*/,
262     "int"/*devtype*/,
263     "int"/*error*/);
264 
265 SDT_PROBE_DEFINE4(sdt, cdev, close, entry,
266     "struct cdevsw *"/*cdevsw*/,
267     "dev_t"/*dev*/,
268     "int"/*flag*/,
269     "int"/*devtype*/);
270 SDT_PROBE_DEFINE5(sdt, cdev, close, return,
271     "struct cdevsw *"/*cdevsw*/,
272     "dev_t"/*dev*/,
273     "int"/*flag*/,
274     "int"/*devtype*/,
275     "int"/*error*/);
276 
277 SDT_PROBE_DEFINE4(sdt, cdev, read, entry,
278     "struct cdevsw *"/*cdevsw*/,
279     "dev_t"/*dev*/,
280     "struct uio *"/*uio*/,
281     "int"/*flag*/);
282 SDT_PROBE_DEFINE5(sdt, cdev, read, return,
283     "struct cdevsw *"/*cdevsw*/,
284     "dev_t"/*dev*/,
285     "struct uio *"/*uio*/,
286     "int"/*flag*/,
287     "int"/*error*/);
288 
289 SDT_PROBE_DEFINE4(sdt, cdev, write, entry,
290     "struct cdevsw *"/*cdevsw*/,
291     "dev_t"/*dev*/,
292     "struct uio *"/*uio*/,
293     "int"/*flag*/);
294 SDT_PROBE_DEFINE5(sdt, cdev, write, return,
295     "struct cdevsw *"/*cdevsw*/,
296     "dev_t"/*dev*/,
297     "struct uio *"/*uio*/,
298     "int"/*flag*/,
299     "int"/*error*/);
300 
301 SDT_PROBE_DEFINE5(sdt, cdev, ioctl, entry,
302     "struct cdevsw *"/*cdevsw*/,
303     "dev_t"/*dev*/,
304     "unsigned long"/*cmd*/,
305     "void *"/*data*/,
306     "int"/*flag*/);
307 SDT_PROBE_DEFINE6(sdt, cdev, ioctl, return,
308     "struct cdevsw *"/*cdevsw*/,
309     "dev_t"/*dev*/,
310     "unsigned long"/*cmd*/,
311     "void *"/*data*/,
312     "int"/*flag*/,
313     "int"/*error*/);
314 
315 SDT_PROBE_DEFINE4(sdt, cdev, stop, entry,
316     "struct cdevsw *"/*cdevsw*/,
317     "dev_t"/*dev*/,
318     "struct tty *"/*tp*/,
319     "int"/*flag*/);
320 SDT_PROBE_DEFINE4(sdt, cdev, stop, return,
321     "struct cdevsw *"/*cdevsw*/,
322     "dev_t"/*dev*/,
323     "struct tty *"/*tp*/,
324     "int"/*flag*/);
325 
326 SDT_PROBE_DEFINE3(sdt, cdev, poll, entry,
327     "struct cdevsw *"/*cdevsw*/,
328     "dev_t"/*dev*/,
329     "int"/*events*/);
330 SDT_PROBE_DEFINE4(sdt, cdev, poll, return,
331     "struct cdevsw *"/*cdevsw*/,
332     "dev_t"/*dev*/,
333     "int"/*events*/,
334     "int"/*revents*/);
335 
336 SDT_PROBE_DEFINE4(sdt, cdev, mmap, entry,
337     "struct cdevsw *"/*cdevsw*/,
338     "dev_t"/*dev*/,
339     "off_t"/*off*/,
340     "int"/*flag*/);
341 SDT_PROBE_DEFINE5(sdt, cdev, mmap, return,
342     "struct cdevsw *"/*cdevsw*/,
343     "dev_t"/*dev*/,
344     "off_t"/*off*/,
345     "int"/*flag*/,
346     "paddr_t"/*mmapcookie*/);
347 
348 SDT_PROBE_DEFINE3(sdt, cdev, kqfilter, entry,
349     "struct cdevsw *"/*cdevsw*/,
350     "dev_t"/*dev*/,
351     "struct knote *"/*kn*/);
352 SDT_PROBE_DEFINE4(sdt, cdev, kqfilter, return,
353     "struct cdevsw *"/*cdevsw*/,
354     "dev_t"/*dev*/,
355     "struct knote *"/*kn*/,
356     "int"/*error*/);
357 
358 SDT_PROBE_DEFINE4(sdt, cdev, discard, entry,
359     "struct cdevsw *"/*cdevsw*/,
360     "dev_t"/*dev*/,
361     "off_t"/*pos*/,
362     "off_t"/*len*/);
363 SDT_PROBE_DEFINE5(sdt, cdev, discard, return,
364     "struct cdevsw *"/*cdevsw*/,
365     "dev_t"/*dev*/,
366     "off_t"/*pos*/,
367     "off_t"/*len*/,
368     "int"/*error*/);
369 
370 void
371 devsw_init(void)
372 {
373 
374 	KASSERT(sys_bdevsws < MAXDEVSW - 1);
375 	KASSERT(sys_cdevsws < MAXDEVSW - 1);
376 	mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE);
377 
378 	cv_init(&devsw_cv, "devsw");
379 }
380 
381 int
382 devsw_attach(const char *devname,
383 	     const struct bdevsw *bdev, devmajor_t *bmajor,
384 	     const struct cdevsw *cdev, devmajor_t *cmajor)
385 {
386 	struct devsw_conv *conv;
387 	char *name;
388 	int error, i;
389 
390 	if (devname == NULL || cdev == NULL)
391 		return EINVAL;
392 
393 	mutex_enter(&device_lock);
394 
395 	for (i = 0; i < max_devsw_convs; i++) {
396 		conv = &devsw_conv[i];
397 		if (conv->d_name == NULL || strcmp(devname, conv->d_name) != 0)
398 			continue;
399 
400 		if ((bdev != NULL) && (*bmajor < 0))
401 			*bmajor = conv->d_bmajor;
402 		if (*cmajor < 0)
403 			*cmajor = conv->d_cmajor;
404 
405 		if (*bmajor != conv->d_bmajor || *cmajor != conv->d_cmajor) {
406 			error = EINVAL;
407 			goto out;
408 		}
409 		if ((*bmajor >= 0 && bdev == NULL) || *cmajor < 0) {
410 			error = EINVAL;
411 			goto out;
412 		}
413 
414 		if ((*bmajor >= 0 && bdevsw[*bmajor] != NULL) ||
415 		    cdevsw[*cmajor] != NULL) {
416 			error = EEXIST;
417 			goto out;
418 		}
419 		break;
420 	}
421 
422 	/*
423 	 * XXX This should allocate what it needs up front so we never
424 	 * need to flail around trying to unwind.
425 	 */
426 	error = bdevsw_attach(bdev, bmajor);
427 	if (error != 0)
428 		goto out;
429 	error = cdevsw_attach(cdev, cmajor);
430 	if (error != 0) {
431 		devsw_detach_locked(bdev, NULL);
432 		goto out;
433 	}
434 
435 	/*
436 	 * If we already found a conv, we're done.  Otherwise, find an
437 	 * empty slot or extend the table.
438 	 */
439 	if (i < max_devsw_convs) {
440 		error = 0;
441 		goto out;
442 	}
443 
444 	for (i = 0; i < max_devsw_convs; i++) {
445 		if (devsw_conv[i].d_name == NULL)
446 			break;
447 	}
448 	if (i == max_devsw_convs) {
449 		struct devsw_conv *newptr;
450 		int old_convs, new_convs;
451 
452 		old_convs = max_devsw_convs;
453 		new_convs = old_convs + 1;
454 
455 		newptr = kmem_zalloc(new_convs * DEVSWCONV_SIZE, KM_NOSLEEP);
456 		if (newptr == NULL) {
457 			devsw_detach_locked(bdev, cdev);
458 			error = ENOMEM;
459 			goto out;
460 		}
461 		newptr[old_convs].d_name = NULL;
462 		newptr[old_convs].d_bmajor = -1;
463 		newptr[old_convs].d_cmajor = -1;
464 		memcpy(newptr, devsw_conv, old_convs * DEVSWCONV_SIZE);
465 		if (devsw_conv != devsw_conv0)
466 			kmem_free(devsw_conv, old_convs * DEVSWCONV_SIZE);
467 		devsw_conv = newptr;
468 		max_devsw_convs = new_convs;
469 	}
470 
471 	name = kmem_strdupsize(devname, NULL, KM_NOSLEEP);
472 	if (name == NULL) {
473 		devsw_detach_locked(bdev, cdev);
474 		error = ENOMEM;
475 		goto out;
476 	}
477 
478 	devsw_conv[i].d_name = name;
479 	devsw_conv[i].d_bmajor = *bmajor;
480 	devsw_conv[i].d_cmajor = *cmajor;
481 	error = 0;
482 out:
483 	mutex_exit(&device_lock);
484 	return error;
485 }
486 
487 static int
488 bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor)
489 {
490 	const struct bdevsw **newbdevsw = NULL;
491 	struct devswref *newbdevswref = NULL;
492 	struct localcount *lc;
493 	devmajor_t bmajor;
494 	int i;
495 
496 	KASSERT(mutex_owned(&device_lock));
497 
498 	if (devsw == NULL)
499 		return 0;
500 
501 	if (*devmajor < 0) {
502 		for (bmajor = sys_bdevsws; bmajor < max_bdevsws; bmajor++) {
503 			if (bdevsw[bmajor] != NULL)
504 				continue;
505 			for (i = 0; i < max_devsw_convs; i++) {
506 				if (devsw_conv[i].d_bmajor == bmajor)
507 					break;
508 			}
509 			if (i != max_devsw_convs)
510 				continue;
511 			break;
512 		}
513 		*devmajor = bmajor;
514 	}
515 
516 	if (*devmajor >= MAXDEVSW) {
517 		printf("%s: block majors exhausted\n", __func__);
518 		return ENOMEM;
519 	}
520 
521 	if (bdevswref == NULL) {
522 		newbdevswref = kmem_zalloc(MAXDEVSW * sizeof(newbdevswref[0]),
523 		    KM_NOSLEEP);
524 		if (newbdevswref == NULL)
525 			return ENOMEM;
526 		atomic_store_release(&bdevswref, newbdevswref);
527 	}
528 
529 	if (*devmajor >= max_bdevsws) {
530 		KASSERT(bdevsw == bdevsw0);
531 		newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]),
532 		    KM_NOSLEEP);
533 		if (newbdevsw == NULL)
534 			return ENOMEM;
535 		memcpy(newbdevsw, bdevsw, max_bdevsws * sizeof(bdevsw[0]));
536 		atomic_store_release(&bdevsw, newbdevsw);
537 		atomic_store_release(&max_bdevsws, MAXDEVSW);
538 	}
539 
540 	if (bdevsw[*devmajor] != NULL)
541 		return EEXIST;
542 
543 	KASSERT(bdevswref[*devmajor].dr_lc == NULL);
544 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
545 	localcount_init(lc);
546 	bdevswref[*devmajor].dr_lc = lc;
547 
548 	atomic_store_release(&bdevsw[*devmajor], devsw);
549 
550 	return 0;
551 }
552 
553 static int
554 cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor)
555 {
556 	const struct cdevsw **newcdevsw = NULL;
557 	struct devswref *newcdevswref = NULL;
558 	struct localcount *lc;
559 	devmajor_t cmajor;
560 	int i;
561 
562 	KASSERT(mutex_owned(&device_lock));
563 
564 	if (*devmajor < 0) {
565 		for (cmajor = sys_cdevsws; cmajor < max_cdevsws; cmajor++) {
566 			if (cdevsw[cmajor] != NULL)
567 				continue;
568 			for (i = 0; i < max_devsw_convs; i++) {
569 				if (devsw_conv[i].d_cmajor == cmajor)
570 					break;
571 			}
572 			if (i != max_devsw_convs)
573 				continue;
574 			break;
575 		}
576 		*devmajor = cmajor;
577 	}
578 
579 	if (*devmajor >= MAXDEVSW) {
580 		printf("%s: character majors exhausted\n", __func__);
581 		return ENOMEM;
582 	}
583 
584 	if (cdevswref == NULL) {
585 		newcdevswref = kmem_zalloc(MAXDEVSW * sizeof(newcdevswref[0]),
586 		    KM_NOSLEEP);
587 		if (newcdevswref == NULL)
588 			return ENOMEM;
589 		atomic_store_release(&cdevswref, newcdevswref);
590 	}
591 
592 	if (*devmajor >= max_cdevsws) {
593 		KASSERT(cdevsw == cdevsw0);
594 		newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]),
595 		    KM_NOSLEEP);
596 		if (newcdevsw == NULL)
597 			return ENOMEM;
598 		memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0]));
599 		atomic_store_release(&cdevsw, newcdevsw);
600 		atomic_store_release(&max_cdevsws, MAXDEVSW);
601 	}
602 
603 	if (cdevsw[*devmajor] != NULL)
604 		return EEXIST;
605 
606 	KASSERT(cdevswref[*devmajor].dr_lc == NULL);
607 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
608 	localcount_init(lc);
609 	cdevswref[*devmajor].dr_lc = lc;
610 
611 	atomic_store_release(&cdevsw[*devmajor], devsw);
612 
613 	return 0;
614 }
615 
616 static void
617 devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev)
618 {
619 	int bi = -1, ci = -1/*XXXGCC*/, di;
620 	struct cfdriver *cd;
621 	device_t dv;
622 
623 	KASSERT(mutex_owned(&device_lock));
624 
625 	/*
626 	 * If this is wired to an autoconf device, make sure the device
627 	 * has no more instances.  No locking here because under
628 	 * correct use of devsw_detach, none of this state can change
629 	 * at this point.
630 	 */
631 	if (cdev != NULL && (cd = cdev->d_cfdriver) != NULL) {
632 		for (di = 0; di < cd->cd_ndevs; di++) {
633 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
634 			    "detaching character device driver %s"
635 			    " still has attached unit %s",
636 			    cd->cd_name, device_xname(dv));
637 		}
638 	}
639 	if (bdev != NULL && (cd = bdev->d_cfdriver) != NULL) {
640 		for (di = 0; di < cd->cd_ndevs; di++) {
641 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
642 			    "detaching block device driver %s"
643 			    " still has attached unit %s",
644 			    cd->cd_name, device_xname(dv));
645 		}
646 	}
647 
648 	/* Prevent new references.  */
649 	if (bdev != NULL) {
650 		for (bi = 0; bi < max_bdevsws; bi++) {
651 			if (bdevsw[bi] != bdev)
652 				continue;
653 			atomic_store_relaxed(&bdevsw[bi], NULL);
654 			break;
655 		}
656 		KASSERT(bi < max_bdevsws);
657 	}
658 	if (cdev != NULL) {
659 		for (ci = 0; ci < max_cdevsws; ci++) {
660 			if (cdevsw[ci] != cdev)
661 				continue;
662 			atomic_store_relaxed(&cdevsw[ci], NULL);
663 			break;
664 		}
665 		KASSERT(ci < max_cdevsws);
666 	}
667 
668 	if (bdev == NULL && cdev == NULL) /* XXX possible? */
669 		return;
670 
671 	/*
672 	 * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire
673 	 * calls to notice that the devsw is gone.
674 	 *
675 	 * XXX Despite the use of the pserialize_read_enter/exit API
676 	 * elsewhere in this file, we use xc_barrier here instead of
677 	 * pserialize_perform -- because devsw_init is too early for
678 	 * pserialize_create.  Either pserialize_create should be made
679 	 * to work earlier, or it should be nixed altogether.  Until
680 	 * that is fixed, xc_barrier will serve the same purpose.
681 	 */
682 	xc_barrier(0);
683 
684 	/*
685 	 * Wait for all references to drain.  It is the caller's
686 	 * responsibility to ensure that at this point, there are no
687 	 * extant open instances and all new d_open calls will fail.
688 	 *
689 	 * Note that localcount_drain may release and reacquire
690 	 * device_lock.
691 	 */
692 	if (bdev != NULL) {
693 		localcount_drain(bdevswref[bi].dr_lc,
694 		    &devsw_cv, &device_lock);
695 		localcount_fini(bdevswref[bi].dr_lc);
696 		kmem_free(bdevswref[bi].dr_lc, sizeof(*bdevswref[bi].dr_lc));
697 		bdevswref[bi].dr_lc = NULL;
698 	}
699 	if (cdev != NULL) {
700 		localcount_drain(cdevswref[ci].dr_lc,
701 		    &devsw_cv, &device_lock);
702 		localcount_fini(cdevswref[ci].dr_lc);
703 		kmem_free(cdevswref[ci].dr_lc, sizeof(*cdevswref[ci].dr_lc));
704 		cdevswref[ci].dr_lc = NULL;
705 	}
706 }
707 
708 void
709 devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev)
710 {
711 
712 	mutex_enter(&device_lock);
713 	devsw_detach_locked(bdev, cdev);
714 	mutex_exit(&device_lock);
715 }
716 
717 /*
718  * Look up a block device by number.
719  *
720  * => Caller must ensure that the device is attached.
721  */
722 const struct bdevsw *
723 bdevsw_lookup(dev_t dev)
724 {
725 	devmajor_t bmajor;
726 
727 	if (dev == NODEV)
728 		return NULL;
729 	bmajor = major(dev);
730 	if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws))
731 		return NULL;
732 
733 	return atomic_load_consume(&bdevsw)[bmajor];
734 }
735 
736 static const struct bdevsw *
737 bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
738 {
739 	devmajor_t bmajor;
740 	const struct bdevsw *bdev = NULL, *const *curbdevsw;
741 	struct devswref *curbdevswref;
742 	int s;
743 
744 	if (dev == NODEV)
745 		return NULL;
746 	bmajor = major(dev);
747 	if (bmajor < 0)
748 		return NULL;
749 
750 	s = pserialize_read_enter();
751 
752 	/*
753 	 * max_bdevsws never goes down, so it is safe to rely on this
754 	 * condition without any locking for the array access below.
755 	 * Test sys_bdevsws first so we can avoid the memory barrier in
756 	 * that case.
757 	 */
758 	if (bmajor >= sys_bdevsws &&
759 	    bmajor >= atomic_load_acquire(&max_bdevsws))
760 		goto out;
761 	curbdevsw = atomic_load_consume(&bdevsw);
762 	if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL)
763 		goto out;
764 
765 	curbdevswref = atomic_load_consume(&bdevswref);
766 	if (curbdevswref == NULL) {
767 		*lcp = NULL;
768 	} else if ((*lcp = curbdevswref[bmajor].dr_lc) != NULL) {
769 		localcount_acquire(*lcp);
770 	}
771 out:
772 	pserialize_read_exit(s);
773 	return bdev;
774 }
775 
776 static void
777 bdevsw_release(const struct bdevsw *bdev, struct localcount *lc)
778 {
779 
780 	if (lc == NULL)
781 		return;
782 	localcount_release(lc, &devsw_cv, &device_lock);
783 }
784 
785 /*
786  * Look up a character device by number.
787  *
788  * => Caller must ensure that the device is attached.
789  */
790 const struct cdevsw *
791 cdevsw_lookup(dev_t dev)
792 {
793 	devmajor_t cmajor;
794 
795 	if (dev == NODEV)
796 		return NULL;
797 	cmajor = major(dev);
798 	if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws))
799 		return NULL;
800 
801 	return atomic_load_consume(&cdevsw)[cmajor];
802 }
803 
804 static const struct cdevsw *
805 cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
806 {
807 	devmajor_t cmajor;
808 	const struct cdevsw *cdev = NULL, *const *curcdevsw;
809 	struct devswref *curcdevswref;
810 	int s;
811 
812 	if (dev == NODEV)
813 		return NULL;
814 	cmajor = major(dev);
815 	if (cmajor < 0)
816 		return NULL;
817 
818 	s = pserialize_read_enter();
819 
820 	/*
821 	 * max_cdevsws never goes down, so it is safe to rely on this
822 	 * condition without any locking for the array access below.
823 	 * Test sys_cdevsws first so we can avoid the memory barrier in
824 	 * that case.
825 	 */
826 	if (cmajor >= sys_cdevsws &&
827 	    cmajor >= atomic_load_acquire(&max_cdevsws))
828 		goto out;
829 	curcdevsw = atomic_load_consume(&cdevsw);
830 	if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL)
831 		goto out;
832 
833 	curcdevswref = atomic_load_consume(&cdevswref);
834 	if (curcdevswref == NULL) {
835 		*lcp = NULL;
836 	} else if ((*lcp = curcdevswref[cmajor].dr_lc) != NULL) {
837 		localcount_acquire(*lcp);
838 	}
839 out:
840 	pserialize_read_exit(s);
841 	return cdev;
842 }
843 
844 static void
845 cdevsw_release(const struct cdevsw *cdev, struct localcount *lc)
846 {
847 
848 	if (lc == NULL)
849 		return;
850 	localcount_release(lc, &devsw_cv, &device_lock);
851 }
852 
853 /*
854  * Look up a block device by reference to its operations set.
855  *
856  * => Caller must ensure that the device is not detached, and therefore
857  *    that the returned major is still valid when dereferenced.
858  */
859 devmajor_t
860 bdevsw_lookup_major(const struct bdevsw *bdev)
861 {
862 	const struct bdevsw *const *curbdevsw;
863 	devmajor_t bmajor, bmax;
864 
865 	bmax = atomic_load_acquire(&max_bdevsws);
866 	curbdevsw = atomic_load_consume(&bdevsw);
867 	for (bmajor = 0; bmajor < bmax; bmajor++) {
868 		if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev)
869 			return bmajor;
870 	}
871 
872 	return NODEVMAJOR;
873 }
874 
875 /*
876  * Look up a character device by reference to its operations set.
877  *
878  * => Caller must ensure that the device is not detached, and therefore
879  *    that the returned major is still valid when dereferenced.
880  */
881 devmajor_t
882 cdevsw_lookup_major(const struct cdevsw *cdev)
883 {
884 	const struct cdevsw *const *curcdevsw;
885 	devmajor_t cmajor, cmax;
886 
887 	cmax = atomic_load_acquire(&max_cdevsws);
888 	curcdevsw = atomic_load_consume(&cdevsw);
889 	for (cmajor = 0; cmajor < cmax; cmajor++) {
890 		if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev)
891 			return cmajor;
892 	}
893 
894 	return NODEVMAJOR;
895 }
896 
897 /*
898  * Convert from block major number to name.
899  *
900  * => Caller must ensure that the device is not detached, and therefore
901  *    that the name pointer is still valid when dereferenced.
902  */
903 const char *
904 devsw_blk2name(devmajor_t bmajor)
905 {
906 	const char *name;
907 	devmajor_t cmajor;
908 	int i;
909 
910 	name = NULL;
911 	cmajor = -1;
912 
913 	mutex_enter(&device_lock);
914 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
915 		mutex_exit(&device_lock);
916 		return NULL;
917 	}
918 	for (i = 0; i < max_devsw_convs; i++) {
919 		if (devsw_conv[i].d_bmajor == bmajor) {
920 			cmajor = devsw_conv[i].d_cmajor;
921 			break;
922 		}
923 	}
924 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
925 		name = devsw_conv[i].d_name;
926 	mutex_exit(&device_lock);
927 
928 	return name;
929 }
930 
931 /*
932  * Convert char major number to device driver name.
933  */
934 const char *
935 cdevsw_getname(devmajor_t major)
936 {
937 	const char *name;
938 	int i;
939 
940 	name = NULL;
941 
942 	if (major < 0)
943 		return NULL;
944 
945 	mutex_enter(&device_lock);
946 	for (i = 0; i < max_devsw_convs; i++) {
947 		if (devsw_conv[i].d_cmajor == major) {
948 			name = devsw_conv[i].d_name;
949 			break;
950 		}
951 	}
952 	mutex_exit(&device_lock);
953 	return name;
954 }
955 
956 /*
957  * Convert block major number to device driver name.
958  */
959 const char *
960 bdevsw_getname(devmajor_t major)
961 {
962 	const char *name;
963 	int i;
964 
965 	name = NULL;
966 
967 	if (major < 0)
968 		return NULL;
969 
970 	mutex_enter(&device_lock);
971 	for (i = 0; i < max_devsw_convs; i++) {
972 		if (devsw_conv[i].d_bmajor == major) {
973 			name = devsw_conv[i].d_name;
974 			break;
975 		}
976 	}
977 	mutex_exit(&device_lock);
978 	return name;
979 }
980 
981 /*
982  * Convert from device name to block major number.
983  *
984  * => Caller must ensure that the device is not detached, and therefore
985  *    that the major number is still valid when dereferenced.
986  */
987 devmajor_t
988 devsw_name2blk(const char *name, char *devname, size_t devnamelen)
989 {
990 	struct devsw_conv *conv;
991 	devmajor_t bmajor;
992 	int i;
993 
994 	if (name == NULL)
995 		return NODEVMAJOR;
996 
997 	mutex_enter(&device_lock);
998 	for (i = 0; i < max_devsw_convs; i++) {
999 		size_t len;
1000 
1001 		conv = &devsw_conv[i];
1002 		if (conv->d_name == NULL)
1003 			continue;
1004 		len = strlen(conv->d_name);
1005 		if (strncmp(conv->d_name, name, len) != 0)
1006 			continue;
1007 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
1008 			continue;
1009 		bmajor = conv->d_bmajor;
1010 		if (bmajor < 0 || bmajor >= max_bdevsws ||
1011 		    bdevsw[bmajor] == NULL)
1012 			break;
1013 		if (devname != NULL) {
1014 #ifdef DEVSW_DEBUG
1015 			if (strlen(conv->d_name) >= devnamelen)
1016 				printf("%s: too short buffer\n", __func__);
1017 #endif /* DEVSW_DEBUG */
1018 			strncpy(devname, conv->d_name, devnamelen);
1019 			devname[devnamelen - 1] = '\0';
1020 		}
1021 		mutex_exit(&device_lock);
1022 		return bmajor;
1023 	}
1024 
1025 	mutex_exit(&device_lock);
1026 	return NODEVMAJOR;
1027 }
1028 
1029 /*
1030  * Convert from device name to char major number.
1031  *
1032  * => Caller must ensure that the device is not detached, and therefore
1033  *    that the major number is still valid when dereferenced.
1034  */
1035 devmajor_t
1036 devsw_name2chr(const char *name, char *devname, size_t devnamelen)
1037 {
1038 	struct devsw_conv *conv;
1039 	devmajor_t cmajor;
1040 	int i;
1041 
1042 	if (name == NULL)
1043 		return NODEVMAJOR;
1044 
1045 	mutex_enter(&device_lock);
1046 	for (i = 0; i < max_devsw_convs; i++) {
1047 		size_t len;
1048 
1049 		conv = &devsw_conv[i];
1050 		if (conv->d_name == NULL)
1051 			continue;
1052 		len = strlen(conv->d_name);
1053 		if (strncmp(conv->d_name, name, len) != 0)
1054 			continue;
1055 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
1056 			continue;
1057 		cmajor = conv->d_cmajor;
1058 		if (cmajor < 0 || cmajor >= max_cdevsws ||
1059 		    cdevsw[cmajor] == NULL)
1060 			break;
1061 		if (devname != NULL) {
1062 #ifdef DEVSW_DEBUG
1063 			if (strlen(conv->d_name) >= devnamelen)
1064 				printf("%s: too short buffer", __func__);
1065 #endif /* DEVSW_DEBUG */
1066 			strncpy(devname, conv->d_name, devnamelen);
1067 			devname[devnamelen - 1] = '\0';
1068 		}
1069 		mutex_exit(&device_lock);
1070 		return cmajor;
1071 	}
1072 
1073 	mutex_exit(&device_lock);
1074 	return NODEVMAJOR;
1075 }
1076 
1077 /*
1078  * Convert from character dev_t to block dev_t.
1079  *
1080  * => Caller must ensure that the device is not detached, and therefore
1081  *    that the major number is still valid when dereferenced.
1082  */
1083 dev_t
1084 devsw_chr2blk(dev_t cdev)
1085 {
1086 	devmajor_t bmajor, cmajor;
1087 	int i;
1088 	dev_t rv;
1089 
1090 	cmajor = major(cdev);
1091 	bmajor = NODEVMAJOR;
1092 	rv = NODEV;
1093 
1094 	mutex_enter(&device_lock);
1095 	if (cmajor < 0 || cmajor >= max_cdevsws || cdevsw[cmajor] == NULL) {
1096 		mutex_exit(&device_lock);
1097 		return NODEV;
1098 	}
1099 	for (i = 0; i < max_devsw_convs; i++) {
1100 		if (devsw_conv[i].d_cmajor == cmajor) {
1101 			bmajor = devsw_conv[i].d_bmajor;
1102 			break;
1103 		}
1104 	}
1105 	if (bmajor >= 0 && bmajor < max_bdevsws && bdevsw[bmajor] != NULL)
1106 		rv = makedev(bmajor, minor(cdev));
1107 	mutex_exit(&device_lock);
1108 
1109 	return rv;
1110 }
1111 
1112 /*
1113  * Convert from block dev_t to character dev_t.
1114  *
1115  * => Caller must ensure that the device is not detached, and therefore
1116  *    that the major number is still valid when dereferenced.
1117  */
1118 dev_t
1119 devsw_blk2chr(dev_t bdev)
1120 {
1121 	devmajor_t bmajor, cmajor;
1122 	int i;
1123 	dev_t rv;
1124 
1125 	bmajor = major(bdev);
1126 	cmajor = NODEVMAJOR;
1127 	rv = NODEV;
1128 
1129 	mutex_enter(&device_lock);
1130 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
1131 		mutex_exit(&device_lock);
1132 		return NODEV;
1133 	}
1134 	for (i = 0; i < max_devsw_convs; i++) {
1135 		if (devsw_conv[i].d_bmajor == bmajor) {
1136 			cmajor = devsw_conv[i].d_cmajor;
1137 			break;
1138 		}
1139 	}
1140 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
1141 		rv = makedev(cmajor, minor(bdev));
1142 	mutex_exit(&device_lock);
1143 
1144 	return rv;
1145 }
1146 
1147 /*
1148  * Device access methods.
1149  */
1150 
1151 #define	DEV_LOCK(d)						\
1152 	if ((mpflag = (d->d_flag & D_MPSAFE)) == 0) {		\
1153 		KERNEL_LOCK(1, NULL);				\
1154 	}
1155 
1156 #define	DEV_UNLOCK(d)						\
1157 	if (mpflag == 0) {					\
1158 		KERNEL_UNLOCK_ONE(NULL);			\
1159 	}
1160 
1161 int
1162 bdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
1163 {
1164 	const struct bdevsw *d;
1165 	struct localcount *lc;
1166 	device_t dv = NULL/*XXXGCC*/;
1167 	int unit = -1/*XXXGCC*/, rv, mpflag;
1168 
1169 	d = bdevsw_lookup_acquire(dev, &lc);
1170 	if (d == NULL)
1171 		return ENXIO;
1172 
1173 	if (d->d_devtounit) {
1174 		/*
1175 		 * If the device node corresponds to an autoconf device
1176 		 * instance, acquire a reference to it so that during
1177 		 * d_open, device_lookup is stable.
1178 		 *
1179 		 * XXX This should also arrange to instantiate cloning
1180 		 * pseudo-devices if appropriate, but that requires
1181 		 * reviewing them all to find and verify a common
1182 		 * pattern.
1183 		 */
1184 		if ((unit = (*d->d_devtounit)(dev)) == -1) {
1185 			rv = ENXIO;
1186 			goto out;
1187 		}
1188 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) ==
1189 		    NULL) {
1190 			rv = ENXIO;
1191 			goto out;
1192 		}
1193 		SDT_PROBE6(sdt, bdev, open, acquire,
1194 		    d, dev, flag, devtype, unit, dv);
1195 	}
1196 
1197 	DEV_LOCK(d);
1198 	SDT_PROBE4(sdt, bdev, open, entry,  d, dev, flag, devtype);
1199 	rv = (*d->d_open)(dev, flag, devtype, l);
1200 	SDT_PROBE5(sdt, bdev, open, return,  d, dev, flag, devtype, rv);
1201 	DEV_UNLOCK(d);
1202 
1203 	if (d->d_devtounit) {
1204 		SDT_PROBE6(sdt, bdev, open, release,
1205 		    d, dev, flag, devtype, unit, dv);
1206 		device_release(dv);
1207 	}
1208 
1209 out:	bdevsw_release(d, lc);
1210 
1211 	return rv;
1212 }
1213 
1214 int
1215 bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
1216 {
1217 	const struct bdevsw *d;
1218 	int rv, mpflag;
1219 
1220 	if ((d = bdevsw_lookup(dev)) == NULL)
1221 		return ENXIO;
1222 	if (d->d_cancel == NULL)
1223 		return ENODEV;
1224 
1225 	DEV_LOCK(d);
1226 	SDT_PROBE4(sdt, bdev, cancel, entry,  d, dev, flag, devtype);
1227 	rv = (*d->d_cancel)(dev, flag, devtype, l);
1228 	SDT_PROBE5(sdt, bdev, cancel, return,  d, dev, flag, devtype, rv);
1229 	DEV_UNLOCK(d);
1230 
1231 	return rv;
1232 }
1233 
1234 int
1235 bdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
1236 {
1237 	const struct bdevsw *d;
1238 	int rv, mpflag;
1239 
1240 	if ((d = bdevsw_lookup(dev)) == NULL)
1241 		return ENXIO;
1242 
1243 	DEV_LOCK(d);
1244 	SDT_PROBE4(sdt, bdev, close, entry,  d, dev, flag, devtype);
1245 	rv = (*d->d_close)(dev, flag, devtype, l);
1246 	SDT_PROBE5(sdt, bdev, close, return,  d, dev, flag, devtype, rv);
1247 	DEV_UNLOCK(d);
1248 
1249 	return rv;
1250 }
1251 
1252 SDT_PROVIDER_DECLARE(io);
1253 SDT_PROBE_DEFINE1(io, kernel, , start, "struct buf *"/*bp*/);
1254 
1255 void
1256 bdev_strategy(struct buf *bp)
1257 {
1258 	const struct bdevsw *d;
1259 	int mpflag;
1260 
1261 	SDT_PROBE1(io, kernel, , start, bp);
1262 
1263 	if ((d = bdevsw_lookup(bp->b_dev)) == NULL) {
1264 		bp->b_error = ENXIO;
1265 		bp->b_resid = bp->b_bcount;
1266 		biodone_vfs(bp); /* biodone() iff vfs present */
1267 		return;
1268 	}
1269 
1270 	DEV_LOCK(d);
1271 	SDT_PROBE3(sdt, bdev, strategy, entry,  d, bp->b_dev, bp);
1272 	(*d->d_strategy)(bp);
1273 	SDT_PROBE3(sdt, bdev, strategy, return,  d, bp->b_dev, bp);
1274 	DEV_UNLOCK(d);
1275 }
1276 
1277 int
1278 bdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1279 {
1280 	const struct bdevsw *d;
1281 	int rv, mpflag;
1282 
1283 	if ((d = bdevsw_lookup(dev)) == NULL)
1284 		return ENXIO;
1285 
1286 	DEV_LOCK(d);
1287 	SDT_PROBE5(sdt, bdev, ioctl, entry,  d, dev, cmd, data, flag);
1288 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
1289 	SDT_PROBE6(sdt, bdev, ioctl, return,  d, dev, cmd, data, flag, rv);
1290 	DEV_UNLOCK(d);
1291 
1292 	return rv;
1293 }
1294 
1295 int
1296 bdev_dump(dev_t dev, daddr_t addr, void *data, size_t sz)
1297 {
1298 	const struct bdevsw *d;
1299 	int rv;
1300 
1301 	/*
1302 	 * Dump can be called without the device open.  Since it can
1303 	 * currently only be called with the system paused (and in a
1304 	 * potentially unstable state), we don't perform any locking.
1305 	 */
1306 	if ((d = bdevsw_lookup(dev)) == NULL)
1307 		return ENXIO;
1308 
1309 	/* DEV_LOCK(d); */
1310 	rv = (*d->d_dump)(dev, addr, data, sz);
1311 	/* DEV_UNLOCK(d); */
1312 
1313 	return rv;
1314 }
1315 
1316 int
1317 bdev_flags(dev_t dev)
1318 {
1319 	const struct bdevsw *d;
1320 
1321 	if ((d = bdevsw_lookup(dev)) == NULL)
1322 		return 0;
1323 	return d->d_flag & ~D_TYPEMASK;
1324 }
1325 
1326 int
1327 bdev_type(dev_t dev)
1328 {
1329 	const struct bdevsw *d;
1330 
1331 	if ((d = bdevsw_lookup(dev)) == NULL)
1332 		return D_OTHER;
1333 	return d->d_flag & D_TYPEMASK;
1334 }
1335 
1336 int
1337 bdev_size(dev_t dev)
1338 {
1339 	const struct bdevsw *d;
1340 	int rv, mpflag = 0;
1341 
1342 	if ((d = bdevsw_lookup(dev)) == NULL ||
1343 	    d->d_psize == NULL)
1344 		return -1;
1345 
1346 	/*
1347 	 * Don't to try lock the device if we're dumping.
1348 	 * XXX: is there a better way to test this?
1349 	 */
1350 	if ((boothowto & RB_DUMP) == 0)
1351 		DEV_LOCK(d);
1352 	SDT_PROBE2(sdt, bdev, psize, entry,  d, dev);
1353 	rv = (*d->d_psize)(dev);
1354 	SDT_PROBE3(sdt, bdev, psize, return,  d, dev, rv);
1355 	if ((boothowto & RB_DUMP) == 0)
1356 		DEV_UNLOCK(d);
1357 
1358 	return rv;
1359 }
1360 
1361 int
1362 bdev_discard(dev_t dev, off_t pos, off_t len)
1363 {
1364 	const struct bdevsw *d;
1365 	int rv, mpflag;
1366 
1367 	if ((d = bdevsw_lookup(dev)) == NULL)
1368 		return ENXIO;
1369 
1370 	DEV_LOCK(d);
1371 	SDT_PROBE4(sdt, bdev, discard, entry,  d, dev, pos, len);
1372 	rv = (*d->d_discard)(dev, pos, len);
1373 	SDT_PROBE5(sdt, bdev, discard, return,  d, dev, pos, len, rv);
1374 	DEV_UNLOCK(d);
1375 
1376 	return rv;
1377 }
1378 
1379 void
1380 bdev_detached(dev_t dev)
1381 {
1382 	const struct bdevsw *d;
1383 	device_t dv;
1384 	int unit;
1385 
1386 	if ((d = bdevsw_lookup(dev)) == NULL)
1387 		return;
1388 	if (d->d_devtounit == NULL)
1389 		return;
1390 	if ((unit = (*d->d_devtounit)(dev)) == -1)
1391 		return;
1392 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
1393 		return;
1394 	config_detach_commit(dv);
1395 }
1396 
1397 int
1398 cdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
1399 {
1400 	const struct cdevsw *d;
1401 	struct localcount *lc;
1402 	device_t dv = NULL/*XXXGCC*/;
1403 	int unit = -1/*XXXGCC*/, rv, mpflag;
1404 
1405 	d = cdevsw_lookup_acquire(dev, &lc);
1406 	if (d == NULL)
1407 		return ENXIO;
1408 
1409 	if (d->d_devtounit) {
1410 		/*
1411 		 * If the device node corresponds to an autoconf device
1412 		 * instance, acquire a reference to it so that during
1413 		 * d_open, device_lookup is stable.
1414 		 *
1415 		 * XXX This should also arrange to instantiate cloning
1416 		 * pseudo-devices if appropriate, but that requires
1417 		 * reviewing them all to find and verify a common
1418 		 * pattern.
1419 		 */
1420 		if ((unit = (*d->d_devtounit)(dev)) == -1) {
1421 			rv = ENXIO;
1422 			goto out;
1423 		}
1424 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) ==
1425 		    NULL) {
1426 			rv = ENXIO;
1427 			goto out;
1428 		}
1429 		SDT_PROBE6(sdt, cdev, open, acquire,
1430 		    d, dev, flag, devtype, unit, dv);
1431 	}
1432 
1433 	DEV_LOCK(d);
1434 	SDT_PROBE4(sdt, cdev, open, entry,  d, dev, flag, devtype);
1435 	rv = (*d->d_open)(dev, flag, devtype, l);
1436 	SDT_PROBE5(sdt, cdev, open, return,  d, dev, flag, devtype, rv);
1437 	DEV_UNLOCK(d);
1438 
1439 	if (d->d_devtounit) {
1440 		SDT_PROBE6(sdt, cdev, open, release,
1441 		    d, dev, flag, devtype, unit, dv);
1442 		device_release(dv);
1443 	}
1444 
1445 out:	cdevsw_release(d, lc);
1446 
1447 	return rv;
1448 }
1449 
1450 int
1451 cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
1452 {
1453 	const struct cdevsw *d;
1454 	int rv, mpflag;
1455 
1456 	if ((d = cdevsw_lookup(dev)) == NULL)
1457 		return ENXIO;
1458 	if (d->d_cancel == NULL)
1459 		return ENODEV;
1460 
1461 	DEV_LOCK(d);
1462 	SDT_PROBE4(sdt, cdev, cancel, entry,  d, dev, flag, devtype);
1463 	rv = (*d->d_cancel)(dev, flag, devtype, l);
1464 	SDT_PROBE5(sdt, cdev, cancel, return,  d, dev, flag, devtype, rv);
1465 	DEV_UNLOCK(d);
1466 
1467 	return rv;
1468 }
1469 
1470 int
1471 cdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
1472 {
1473 	const struct cdevsw *d;
1474 	int rv, mpflag;
1475 
1476 	if ((d = cdevsw_lookup(dev)) == NULL)
1477 		return ENXIO;
1478 
1479 	DEV_LOCK(d);
1480 	SDT_PROBE4(sdt, cdev, close, entry,  d, dev, flag, devtype);
1481 	rv = (*d->d_close)(dev, flag, devtype, l);
1482 	SDT_PROBE5(sdt, cdev, close, return,  d, dev, flag, devtype, rv);
1483 	DEV_UNLOCK(d);
1484 
1485 	return rv;
1486 }
1487 
1488 int
1489 cdev_read(dev_t dev, struct uio *uio, int flag)
1490 {
1491 	const struct cdevsw *d;
1492 	int rv, mpflag;
1493 
1494 	if ((d = cdevsw_lookup(dev)) == NULL)
1495 		return ENXIO;
1496 
1497 	DEV_LOCK(d);
1498 	SDT_PROBE4(sdt, cdev, read, entry,  d, dev, uio, flag);
1499 	rv = (*d->d_read)(dev, uio, flag);
1500 	SDT_PROBE5(sdt, cdev, read, return,  d, dev, uio, flag, rv);
1501 	DEV_UNLOCK(d);
1502 
1503 	return rv;
1504 }
1505 
1506 int
1507 cdev_write(dev_t dev, struct uio *uio, int flag)
1508 {
1509 	const struct cdevsw *d;
1510 	int rv, mpflag;
1511 
1512 	if ((d = cdevsw_lookup(dev)) == NULL)
1513 		return ENXIO;
1514 
1515 	DEV_LOCK(d);
1516 	SDT_PROBE4(sdt, cdev, write, entry,  d, dev, uio, flag);
1517 	rv = (*d->d_write)(dev, uio, flag);
1518 	SDT_PROBE5(sdt, cdev, write, return,  d, dev, uio, flag, rv);
1519 	DEV_UNLOCK(d);
1520 
1521 	return rv;
1522 }
1523 
1524 int
1525 cdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1526 {
1527 	const struct cdevsw *d;
1528 	int rv, mpflag;
1529 
1530 	if ((d = cdevsw_lookup(dev)) == NULL)
1531 		return ENXIO;
1532 
1533 	DEV_LOCK(d);
1534 	SDT_PROBE5(sdt, cdev, ioctl, entry,  d, dev, cmd, data, flag);
1535 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
1536 	SDT_PROBE6(sdt, cdev, ioctl, return,  d, dev, cmd, data, flag, rv);
1537 	DEV_UNLOCK(d);
1538 
1539 	return rv;
1540 }
1541 
1542 void
1543 cdev_stop(struct tty *tp, int flag)
1544 {
1545 	const struct cdevsw *d;
1546 	int mpflag;
1547 
1548 	if ((d = cdevsw_lookup(tp->t_dev)) == NULL)
1549 		return;
1550 
1551 	DEV_LOCK(d);
1552 	SDT_PROBE4(sdt, cdev, stop, entry,  d, tp->t_dev, tp, flag);
1553 	(*d->d_stop)(tp, flag);
1554 	SDT_PROBE4(sdt, cdev, stop, return,  d, tp->t_dev, tp, flag);
1555 	DEV_UNLOCK(d);
1556 }
1557 
1558 struct tty *
1559 cdev_tty(dev_t dev)
1560 {
1561 	const struct cdevsw *d;
1562 
1563 	if ((d = cdevsw_lookup(dev)) == NULL)
1564 		return NULL;
1565 
1566 	/* XXX Check if necessary. */
1567 	if (d->d_tty == NULL)
1568 		return NULL;
1569 
1570 	return (*d->d_tty)(dev);
1571 }
1572 
1573 int
1574 cdev_poll(dev_t dev, int flag, lwp_t *l)
1575 {
1576 	const struct cdevsw *d;
1577 	int rv, mpflag;
1578 
1579 	if ((d = cdevsw_lookup(dev)) == NULL)
1580 		return POLLERR;
1581 
1582 	DEV_LOCK(d);
1583 	SDT_PROBE3(sdt, cdev, poll, entry,  d, dev, flag);
1584 	rv = (*d->d_poll)(dev, flag, l);
1585 	SDT_PROBE4(sdt, cdev, poll, return,  d, dev, flag, rv);
1586 	DEV_UNLOCK(d);
1587 
1588 	return rv;
1589 }
1590 
1591 paddr_t
1592 cdev_mmap(dev_t dev, off_t off, int flag)
1593 {
1594 	const struct cdevsw *d;
1595 	paddr_t rv;
1596 	int mpflag;
1597 
1598 	if ((d = cdevsw_lookup(dev)) == NULL)
1599 		return (paddr_t)-1LL;
1600 
1601 	DEV_LOCK(d);
1602 	SDT_PROBE4(sdt, cdev, mmap, entry,  d, dev, off, flag);
1603 	rv = (*d->d_mmap)(dev, off, flag);
1604 	SDT_PROBE5(sdt, cdev, mmap, return,  d, dev, off, flag, rv);
1605 	DEV_UNLOCK(d);
1606 
1607 	return rv;
1608 }
1609 
1610 int
1611 cdev_kqfilter(dev_t dev, struct knote *kn)
1612 {
1613 	const struct cdevsw *d;
1614 	int rv, mpflag;
1615 
1616 	if ((d = cdevsw_lookup(dev)) == NULL)
1617 		return ENXIO;
1618 
1619 	DEV_LOCK(d);
1620 	SDT_PROBE3(sdt, cdev, kqfilter, entry,  d, dev, kn);
1621 	rv = (*d->d_kqfilter)(dev, kn);
1622 	SDT_PROBE4(sdt, cdev, kqfilter, return,  d, dev, kn, rv);
1623 	DEV_UNLOCK(d);
1624 
1625 	return rv;
1626 }
1627 
1628 int
1629 cdev_discard(dev_t dev, off_t pos, off_t len)
1630 {
1631 	const struct cdevsw *d;
1632 	int rv, mpflag;
1633 
1634 	if ((d = cdevsw_lookup(dev)) == NULL)
1635 		return ENXIO;
1636 
1637 	DEV_LOCK(d);
1638 	SDT_PROBE4(sdt, cdev, discard, entry,  d, dev, pos, len);
1639 	rv = (*d->d_discard)(dev, pos, len);
1640 	SDT_PROBE5(sdt, cdev, discard, return,  d, dev, pos, len, rv);
1641 	DEV_UNLOCK(d);
1642 
1643 	return rv;
1644 }
1645 
1646 int
1647 cdev_flags(dev_t dev)
1648 {
1649 	const struct cdevsw *d;
1650 
1651 	if ((d = cdevsw_lookup(dev)) == NULL)
1652 		return 0;
1653 	return d->d_flag & ~D_TYPEMASK;
1654 }
1655 
1656 int
1657 cdev_type(dev_t dev)
1658 {
1659 	const struct cdevsw *d;
1660 
1661 	if ((d = cdevsw_lookup(dev)) == NULL)
1662 		return D_OTHER;
1663 	return d->d_flag & D_TYPEMASK;
1664 }
1665 
1666 void
1667 cdev_detached(dev_t dev)
1668 {
1669 	const struct cdevsw *d;
1670 	device_t dv;
1671 	int unit;
1672 
1673 	if ((d = cdevsw_lookup(dev)) == NULL)
1674 		return;
1675 	if (d->d_devtounit == NULL)
1676 		return;
1677 	if ((unit = (*d->d_devtounit)(dev)) == -1)
1678 		return;
1679 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
1680 		return;
1681 	config_detach_commit(dv);
1682 }
1683 
1684 /*
1685  * nommap(dev, off, prot)
1686  *
1687  *	mmap routine that always fails, for non-mmappable devices.
1688  */
1689 paddr_t
1690 nommap(dev_t dev, off_t off, int prot)
1691 {
1692 
1693 	return (paddr_t)-1;
1694 }
1695 
1696 /*
1697  * dev_minor_unit(dev)
1698  *
1699  *	Returns minor(dev) as an int.  Intended for use with struct
1700  *	bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are
1701  *	implemented by reference to an autoconf instance with the minor
1702  *	number.
1703  */
1704 int
1705 dev_minor_unit(dev_t dev)
1706 {
1707 
1708 	return minor(dev);
1709 }
1710