xref: /dflybsd-src/sys/kern/kern_device.c (revision 3e82b46c18bc48fdb3c1d60729c7661b3a0bf6bf)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  * Copyright (c) 1982, 1986, 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
31  */
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
45 #include <sys/tree.h>
46 #include <sys/syslink_rpc.h>
47 #include <sys/proc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
50 #include <vfs/devfs/devfs.h>
51 
52 /*
53  * system link descriptors identify the command in the
54  * arguments structure.
55  */
56 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
57 
58 #define DEVOP_DESC_INIT(name)						\
59 	    struct syslink_desc DDESCNAME(name) = {			\
60 		__offsetof(struct dev_ops, __CONCAT(d_, name)),	\
61 	    #name }
62 
63 DEVOP_DESC_INIT(default);
64 DEVOP_DESC_INIT(open);
65 DEVOP_DESC_INIT(close);
66 DEVOP_DESC_INIT(read);
67 DEVOP_DESC_INIT(write);
68 DEVOP_DESC_INIT(ioctl);
69 DEVOP_DESC_INIT(dump);
70 DEVOP_DESC_INIT(psize);
71 DEVOP_DESC_INIT(poll);
72 DEVOP_DESC_INIT(mmap);
73 DEVOP_DESC_INIT(strategy);
74 DEVOP_DESC_INIT(kqfilter);
75 DEVOP_DESC_INIT(revoke);
76 DEVOP_DESC_INIT(clone);
77 
78 /*
79  * Misc default ops
80  */
81 struct dev_ops dead_dev_ops;
82 
83 struct dev_ops default_dev_ops = {
84 	{ "null" },
85 	.d_default = NULL,	/* must be NULL */
86 	.d_open = noopen,
87 	.d_close = noclose,
88 	.d_read = noread,
89 	.d_write = nowrite,
90 	.d_ioctl = noioctl,
91 	.d_poll = nopoll,
92 	.d_mmap = nommap,
93 	.d_strategy = nostrategy,
94 	.d_dump = nodump,
95 	.d_psize = nopsize,
96 	.d_kqfilter = nokqfilter,
97 	.d_revoke = norevoke,
98 	.d_clone = noclone
99 };
100 
101 /************************************************************************
102  *			GENERAL DEVICE API FUNCTIONS			*
103  ************************************************************************/
104 
105 int
106 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
107 {
108 	struct dev_open_args ap;
109 
110 	ap.a_head.a_desc = &dev_open_desc;
111 	ap.a_head.a_dev = dev;
112 	ap.a_oflags = oflags;
113 	ap.a_devtype = devtype;
114 	ap.a_cred = cred;
115 	return(dev->si_ops->d_open(&ap));
116 }
117 
118 int
119 dev_dclose(cdev_t dev, int fflag, int devtype)
120 {
121 	struct dev_close_args ap;
122 
123 	ap.a_head.a_desc = &dev_close_desc;
124 	ap.a_head.a_dev = dev;
125 	ap.a_fflag = fflag;
126 	ap.a_devtype = devtype;
127 	return(dev->si_ops->d_close(&ap));
128 }
129 
130 int
131 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
132 {
133 	struct dev_read_args ap;
134 	int error;
135 
136 	ap.a_head.a_desc = &dev_read_desc;
137 	ap.a_head.a_dev = dev;
138 	ap.a_uio = uio;
139 	ap.a_ioflag = ioflag;
140 	error = dev->si_ops->d_read(&ap);
141 	if (error == 0)
142 		dev->si_lastread = time_second;
143 	return (error);
144 }
145 
146 int
147 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
148 {
149 	struct dev_write_args ap;
150 	int error;
151 
152 	dev->si_lastwrite = time_second;
153 	ap.a_head.a_desc = &dev_write_desc;
154 	ap.a_head.a_dev = dev;
155 	ap.a_uio = uio;
156 	ap.a_ioflag = ioflag;
157 	error = dev->si_ops->d_write(&ap);
158 	return (error);
159 }
160 
161 int
162 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred)
163 {
164 	struct dev_ioctl_args ap;
165 
166 	ap.a_head.a_desc = &dev_ioctl_desc;
167 	ap.a_head.a_dev = dev;
168 	ap.a_cmd = cmd;
169 	ap.a_data = data;
170 	ap.a_fflag = fflag;
171 	ap.a_cred = cred;
172 	return(dev->si_ops->d_ioctl(&ap));
173 }
174 
175 int
176 dev_dpoll(cdev_t dev, int events)
177 {
178 	struct dev_poll_args ap;
179 	int error;
180 
181 	ap.a_head.a_desc = &dev_poll_desc;
182 	ap.a_head.a_dev = dev;
183 	ap.a_events = events;
184 	error = dev->si_ops->d_poll(&ap);
185 	if (error == 0)
186 		return(ap.a_events);
187 	return (seltrue(dev, events));
188 }
189 
190 int
191 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
192 {
193 	struct dev_mmap_args ap;
194 	int error;
195 
196 	ap.a_head.a_desc = &dev_mmap_desc;
197 	ap.a_head.a_dev = dev;
198 	ap.a_offset = offset;
199 	ap.a_nprot = nprot;
200 	error = dev->si_ops->d_mmap(&ap);
201 	if (error == 0)
202 		return(ap.a_result);
203 	return(-1);
204 }
205 
206 int
207 dev_dclone(cdev_t dev)
208 {
209 	struct dev_clone_args ap;
210 
211 	ap.a_head.a_desc = &dev_clone_desc;
212 	ap.a_head.a_dev = dev;
213 	return (dev->si_ops->d_clone(&ap));
214 }
215 
216 int
217 dev_drevoke(cdev_t dev)
218 {
219 	struct dev_revoke_args ap;
220 
221 	ap.a_head.a_desc = &dev_revoke_desc;
222 	ap.a_head.a_dev = dev;
223 	return (dev->si_ops->d_revoke(&ap));
224 }
225 
226 /*
227  * Core device strategy call, used to issue I/O on a device.  There are
228  * two versions, a non-chained version and a chained version.  The chained
229  * version reuses a BIO set up by vn_strategy().  The only difference is
230  * that, for now, we do not push a new tracking structure when chaining
231  * from vn_strategy.  XXX this will ultimately have to change.
232  */
233 void
234 dev_dstrategy(cdev_t dev, struct bio *bio)
235 {
236 	struct dev_strategy_args ap;
237 	struct bio_track *track;
238 
239 	ap.a_head.a_desc = &dev_strategy_desc;
240 	ap.a_head.a_dev = dev;
241 	ap.a_bio = bio;
242 
243 	KKASSERT(bio->bio_track == NULL);
244 	KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
245 	if (bio->bio_buf->b_cmd == BUF_CMD_READ)
246 	    track = &dev->si_track_read;
247 	else
248 	    track = &dev->si_track_write;
249 	bio_track_ref(track);
250 	bio->bio_track = track;
251 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
252 	(void)dev->si_ops->d_strategy(&ap);
253 }
254 
255 void
256 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
257 {
258 	struct dev_strategy_args ap;
259 
260 	ap.a_head.a_desc = &dev_strategy_desc;
261 	ap.a_head.a_dev = dev;
262 	ap.a_bio = bio;
263 
264 	KKASSERT(bio->bio_track != NULL);
265 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
266 	(void)dev->si_ops->d_strategy(&ap);
267 }
268 
269 /*
270  * note: the disk layer is expected to set count, blkno, and secsize before
271  * forwarding the message.
272  */
273 int
274 dev_ddump(cdev_t dev)
275 {
276 	struct dev_dump_args ap;
277 
278 	ap.a_head.a_desc = &dev_dump_desc;
279 	ap.a_head.a_dev = dev;
280 	ap.a_count = 0;
281 	ap.a_blkno = 0;
282 	ap.a_secsize = 0;
283 	return(dev->si_ops->d_dump(&ap));
284 }
285 
286 int64_t
287 dev_dpsize(cdev_t dev)
288 {
289 	struct dev_psize_args ap;
290 	int error;
291 
292 	ap.a_head.a_desc = &dev_psize_desc;
293 	ap.a_head.a_dev = dev;
294 	error = dev->si_ops->d_psize(&ap);
295 	if (error == 0)
296 		return (ap.a_result);
297 	return(-1);
298 }
299 
300 int
301 dev_dkqfilter(cdev_t dev, struct knote *kn)
302 {
303 	struct dev_kqfilter_args ap;
304 	int error;
305 
306 	ap.a_head.a_desc = &dev_kqfilter_desc;
307 	ap.a_head.a_dev = dev;
308 	ap.a_kn = kn;
309 	error = dev->si_ops->d_kqfilter(&ap);
310 	if (error == 0)
311 		return(ap.a_result);
312 	return(ENODEV);
313 }
314 
315 /************************************************************************
316  *			DEVICE HELPER FUNCTIONS				*
317  ************************************************************************/
318 
319 /*
320  * MPSAFE
321  */
322 int
323 dev_drefs(cdev_t dev)
324 {
325     return(dev->si_sysref.refcnt);
326 }
327 
328 /*
329  * MPSAFE
330  */
331 const char *
332 dev_dname(cdev_t dev)
333 {
334     return(dev->si_ops->head.name);
335 }
336 
337 /*
338  * MPSAFE
339  */
340 int
341 dev_dflags(cdev_t dev)
342 {
343     return(dev->si_ops->head.flags);
344 }
345 
346 /*
347  * MPSAFE
348  */
349 int
350 dev_dmaj(cdev_t dev)
351 {
352     return(dev->si_ops->head.maj);
353 }
354 
355 /*
356  * Used when forwarding a request through layers.  The caller adjusts
357  * ap->a_head.a_dev and then calls this function.
358  */
359 int
360 dev_doperate(struct dev_generic_args *ap)
361 {
362     int (*func)(struct dev_generic_args *);
363 
364     func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
365     return (func(ap));
366 }
367 
368 /*
369  * Used by the console intercept code only.  Issue an operation through
370  * a foreign ops structure allowing the ops structure associated
371  * with the device to remain intact.
372  */
373 int
374 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
375 {
376     int (*func)(struct dev_generic_args *);
377 
378     func = *(void **)((char *)ops + ap->a_desc->sd_offset);
379     return (func(ap));
380 }
381 
382 /*
383  * Convert a template dev_ops into the real thing by filling in
384  * uninitialized fields.
385  */
386 void
387 compile_dev_ops(struct dev_ops *ops)
388 {
389 	int offset;
390 
391 	for (offset = offsetof(struct dev_ops, dev_ops_first_field);
392 	     offset <= offsetof(struct dev_ops, dev_ops_last_field);
393 	     offset += sizeof(void *)
394 	) {
395 		void **func_p = (void **)((char *)ops + offset);
396 		void **def_p = (void **)((char *)&default_dev_ops + offset);
397 		if (*func_p == NULL) {
398 			if (ops->d_default)
399 				*func_p = ops->d_default;
400 			else
401 				*func_p = *def_p;
402 		}
403 	}
404 }
405 
406 /************************************************************************
407  *			MAJOR/MINOR SPACE FUNCTION 			*
408  ************************************************************************/
409 
410 /*
411  * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
412  *
413  * Disk devices typically register their major, e.g. 'ad0', and then call
414  * into the disk label management code which overloads its own onto e.g. 'ad0'
415  * to support all the various slice and partition combinations.
416  *
417  * The mask/match supplied in this call are a full 32 bits and the same
418  * mask and match must be specified in a later dev_ops_remove() call to
419  * match this add.  However, the match value for the minor number should never
420  * have any bits set in the major number's bit range (8-15).  The mask value
421  * may be conveniently specified as -1 without creating any major number
422  * interference.
423  */
424 
425 static
426 int
427 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
428 {
429     if (a->maj < b->maj)
430 	return(-1);
431     else if (a->maj > b->maj)
432 	return(1);
433     return(0);
434 }
435 
436 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
437 
438 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
439 
440 /*
441  * Remove all matching dev_ops entries from the dev_ops_array[] major
442  * array so no new user opens can be performed, and destroy all devices
443  * installed in the hash table that are associated with this dev_ops.  (see
444  * destroy_all_devs()).
445  */
446 int
447 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match)
448 {
449 	struct dev_ops_maj *rbmaj;
450 	struct dev_ops_link *link;
451 	struct dev_ops_link **plink;
452 
453 	if (ops != &dead_dev_ops)
454 		destroy_all_devs(ops, mask, match);
455 
456 	rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj);
457 	if (rbmaj == NULL) {
458 		kprintf("double-remove of dev_ops %p for %s(%d)\n",
459 			ops, ops->head.name, ops->head.maj);
460 		return(0);
461 	}
462 	for (plink = &rbmaj->link; (link = *plink) != NULL;
463 	     plink = &link->next) {
464 		if (link->mask == mask && link->match == match) {
465 			if (link->ops == ops)
466 				break;
467 			kprintf("%s: ERROR: cannot remove dev_ops, "
468 			       "its major number %d was stolen by %s\n",
469 				ops->head.name, ops->head.maj,
470 				link->ops->head.name
471 			);
472 		}
473 	}
474 	if (link == NULL) {
475 		kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
476 		       "multiple times!\n",
477 		       ops->head.name, ops->head.maj, mask, match);
478 	} else {
479 		*plink = link->next;
480 		--ops->head.refs; /* XXX ops_release() / record refs */
481 		kfree(link, M_DEVBUF);
482 	}
483 
484 	/*
485 	 * Scrap the RB tree node for the major number if no ops are
486 	 * installed any longer.
487 	 */
488 	if (rbmaj->link == NULL) {
489 		dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead, rbmaj);
490 		kfree(rbmaj, M_DEVBUF);
491 	}
492 
493 #if 0
494 	/*
495 	 * The same ops might be used with multiple devices, so don't
496 	 * complain if the ref count is non-zero.
497 	 */
498 	if (ops->head.refs != 0) {
499 		kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
500 			"while %d device refs still exist!\n",
501 			ops->head.name, ops->head.maj, mask, match,
502 			ops->head.refs);
503 	} else {
504 		if (bootverbose)
505 			kprintf("%s: ops removed\n", ops->head.name);
506 	}
507 #endif
508 	return 0;
509 }
510 
511 int dev_ops_remove_all(struct dev_ops *ops)
512 {
513 	return devfs_destroy_dev_by_ops(ops, -1);
514 }
515 
516 int dev_ops_remove_minor(struct dev_ops *ops, int minor)
517 {
518 	return devfs_destroy_dev_by_ops(ops, minor);
519 }
520 
521 /*
522  * Release a ops entry.  When the ref count reaches zero, recurse
523  * through the stack.
524  */
525 void
526 dev_ops_release(struct dev_ops *ops)
527 {
528 	return;
529 	--ops->head.refs;
530 	if (ops->head.refs == 0) {
531 		/* XXX */
532 	}
533 }
534 
535 struct dev_ops *
536 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
537 {
538 	struct dev_ops *oops = dev->si_ops;
539 
540 	compile_dev_ops(iops);
541 	iops->head.maj = oops->head.maj;
542 	iops->head.data = oops->head.data;
543 	iops->head.flags = oops->head.flags;
544 	dev->si_ops = iops;
545 	dev->si_flags |= SI_INTERCEPTED;
546 
547 	return (oops);
548 }
549 
550 void
551 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
552 {
553 	struct dev_ops *iops = dev->si_ops;
554 
555 	dev->si_ops = oops;
556 	dev->si_flags &= ~SI_INTERCEPTED;
557 	iops->head.maj = 0;
558 	iops->head.data = NULL;
559 	iops->head.flags = 0;
560 }
561 
562 /************************************************************************
563  *			DEFAULT DEV OPS FUNCTIONS			*
564  ************************************************************************/
565 
566 
567 /*
568  * Unsupported devswitch functions (e.g. for writing to read-only device).
569  * XXX may belong elsewhere.
570  */
571 int
572 norevoke(struct dev_revoke_args *ap)
573 {
574 	/* take no action */
575 	return(0);
576 }
577 
578 int
579 noclone(struct dev_clone_args *ap)
580 {
581 	/* take no action */
582 	return (0);	/* allow the clone */
583 }
584 
585 int
586 noopen(struct dev_open_args *ap)
587 {
588 	return (ENODEV);
589 }
590 
591 int
592 noclose(struct dev_close_args *ap)
593 {
594 	return (ENODEV);
595 }
596 
597 int
598 noread(struct dev_read_args *ap)
599 {
600 	return (ENODEV);
601 }
602 
603 int
604 nowrite(struct dev_write_args *ap)
605 {
606 	return (ENODEV);
607 }
608 
609 int
610 noioctl(struct dev_ioctl_args *ap)
611 {
612 	return (ENODEV);
613 }
614 
615 int
616 nokqfilter(struct dev_kqfilter_args *ap)
617 {
618 	return (ENODEV);
619 }
620 
621 int
622 nommap(struct dev_mmap_args *ap)
623 {
624 	return (ENODEV);
625 }
626 
627 int
628 nopoll(struct dev_poll_args *ap)
629 {
630 	ap->a_events = 0;
631 	return(0);
632 }
633 
634 int
635 nostrategy(struct dev_strategy_args *ap)
636 {
637 	struct bio *bio = ap->a_bio;
638 
639 	bio->bio_buf->b_flags |= B_ERROR;
640 	bio->bio_buf->b_error = EOPNOTSUPP;
641 	biodone(bio);
642 	return(0);
643 }
644 
645 int
646 nopsize(struct dev_psize_args *ap)
647 {
648 	ap->a_result = 0;
649 	return(0);
650 }
651 
652 int
653 nodump(struct dev_dump_args *ap)
654 {
655 	return (ENODEV);
656 }
657 
658 /*
659  * XXX this is probably bogus.  Any device that uses it isn't checking the
660  * minor number.
661  */
662 int
663 nullopen(struct dev_open_args *ap)
664 {
665 	return (0);
666 }
667 
668 int
669 nullclose(struct dev_close_args *ap)
670 {
671 	return (0);
672 }
673 
674