xref: /dflybsd-src/sys/kern/kern_device.c (revision ee65b806ac08b188bcab21ef0f1efda2cd5bdef7)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4  * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5  *							All rights reserved.
6  * Copyright (c) 1982, 1986, 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
31  */
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
45 #include <sys/tree.h>
46 #include <sys/syslink_rpc.h>
47 #include <sys/proc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
50 #include <sys/devfs.h>
51 #include <sys/dsched.h>
52 
53 /*
54  * system link descriptors identify the command in the
55  * arguments structure.
56  */
57 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
58 
59 #define DEVOP_DESC_INIT(name)						\
60 	    struct syslink_desc DDESCNAME(name) = {			\
61 		__offsetof(struct dev_ops, __CONCAT(d_, name)),	\
62 	    #name }
63 
64 DEVOP_DESC_INIT(default);
65 DEVOP_DESC_INIT(open);
66 DEVOP_DESC_INIT(close);
67 DEVOP_DESC_INIT(read);
68 DEVOP_DESC_INIT(write);
69 DEVOP_DESC_INIT(ioctl);
70 DEVOP_DESC_INIT(dump);
71 DEVOP_DESC_INIT(psize);
72 DEVOP_DESC_INIT(poll);
73 DEVOP_DESC_INIT(mmap);
74 DEVOP_DESC_INIT(strategy);
75 DEVOP_DESC_INIT(kqfilter);
76 DEVOP_DESC_INIT(revoke);
77 DEVOP_DESC_INIT(clone);
78 
79 /*
80  * Misc default ops
81  */
82 struct dev_ops dead_dev_ops;
83 
84 struct dev_ops default_dev_ops = {
85 	{ "null" },
86 	.d_default = NULL,	/* must be NULL */
87 	.d_open = noopen,
88 	.d_close = noclose,
89 	.d_read = noread,
90 	.d_write = nowrite,
91 	.d_ioctl = noioctl,
92 	.d_poll = nopoll,
93 	.d_mmap = nommap,
94 	.d_strategy = nostrategy,
95 	.d_dump = nodump,
96 	.d_psize = nopsize,
97 	.d_kqfilter = nokqfilter,
98 	.d_revoke = norevoke,
99 	.d_clone = noclone
100 };
101 
102 /************************************************************************
103  *			GENERAL DEVICE API FUNCTIONS			*
104  ************************************************************************/
105 
106 int
107 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
108 {
109 	struct dev_open_args ap;
110 
111 	ap.a_head.a_desc = &dev_open_desc;
112 	ap.a_head.a_dev = dev;
113 	ap.a_oflags = oflags;
114 	ap.a_devtype = devtype;
115 	ap.a_cred = cred;
116 	return(dev->si_ops->d_open(&ap));
117 }
118 
119 int
120 dev_dclose(cdev_t dev, int fflag, int devtype)
121 {
122 	struct dev_close_args ap;
123 
124 	ap.a_head.a_desc = &dev_close_desc;
125 	ap.a_head.a_dev = dev;
126 	ap.a_fflag = fflag;
127 	ap.a_devtype = devtype;
128 	return(dev->si_ops->d_close(&ap));
129 }
130 
131 int
132 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
133 {
134 	struct dev_read_args ap;
135 	int error;
136 
137 	ap.a_head.a_desc = &dev_read_desc;
138 	ap.a_head.a_dev = dev;
139 	ap.a_uio = uio;
140 	ap.a_ioflag = ioflag;
141 	error = dev->si_ops->d_read(&ap);
142 	if (error == 0)
143 		dev->si_lastread = time_second;
144 	return (error);
145 }
146 
147 int
148 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
149 {
150 	struct dev_write_args ap;
151 	int error;
152 
153 	dev->si_lastwrite = time_second;
154 	ap.a_head.a_desc = &dev_write_desc;
155 	ap.a_head.a_dev = dev;
156 	ap.a_uio = uio;
157 	ap.a_ioflag = ioflag;
158 	error = dev->si_ops->d_write(&ap);
159 	return (error);
160 }
161 
162 int
163 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
164 	   struct sysmsg *msg)
165 {
166 	struct dev_ioctl_args ap;
167 
168 	ap.a_head.a_desc = &dev_ioctl_desc;
169 	ap.a_head.a_dev = dev;
170 	ap.a_cmd = cmd;
171 	ap.a_data = data;
172 	ap.a_fflag = fflag;
173 	ap.a_cred = cred;
174 	ap.a_sysmsg = msg;
175 	return(dev->si_ops->d_ioctl(&ap));
176 }
177 
178 int
179 dev_dpoll(cdev_t dev, int events)
180 {
181 	struct dev_poll_args ap;
182 	int error;
183 
184 	ap.a_head.a_desc = &dev_poll_desc;
185 	ap.a_head.a_dev = dev;
186 	ap.a_events = events;
187 	error = dev->si_ops->d_poll(&ap);
188 	if (error == 0)
189 		return(ap.a_events);
190 	return (seltrue(dev, events));
191 }
192 
193 int
194 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
195 {
196 	struct dev_mmap_args ap;
197 	int error;
198 
199 	ap.a_head.a_desc = &dev_mmap_desc;
200 	ap.a_head.a_dev = dev;
201 	ap.a_offset = offset;
202 	ap.a_nprot = nprot;
203 	error = dev->si_ops->d_mmap(&ap);
204 	if (error == 0)
205 		return(ap.a_result);
206 	return(-1);
207 }
208 
209 int
210 dev_dclone(cdev_t dev)
211 {
212 	struct dev_clone_args ap;
213 
214 	ap.a_head.a_desc = &dev_clone_desc;
215 	ap.a_head.a_dev = dev;
216 	return (dev->si_ops->d_clone(&ap));
217 }
218 
219 int
220 dev_drevoke(cdev_t dev)
221 {
222 	struct dev_revoke_args ap;
223 
224 	ap.a_head.a_desc = &dev_revoke_desc;
225 	ap.a_head.a_dev = dev;
226 	return (dev->si_ops->d_revoke(&ap));
227 }
228 
229 /*
230  * Core device strategy call, used to issue I/O on a device.  There are
231  * two versions, a non-chained version and a chained version.  The chained
232  * version reuses a BIO set up by vn_strategy().  The only difference is
233  * that, for now, we do not push a new tracking structure when chaining
234  * from vn_strategy.  XXX this will ultimately have to change.
235  */
236 void
237 dev_dstrategy(cdev_t dev, struct bio *bio)
238 {
239 	struct dev_strategy_args ap;
240 	struct bio_track *track;
241 
242 	ap.a_head.a_desc = &dev_strategy_desc;
243 	ap.a_head.a_dev = dev;
244 	ap.a_bio = bio;
245 
246 	KKASSERT(bio->bio_track == NULL);
247 	KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
248 	if (bio->bio_buf->b_cmd == BUF_CMD_READ)
249 	    track = &dev->si_track_read;
250 	else
251 	    track = &dev->si_track_write;
252 	bio_track_ref(track);
253 	bio->bio_track = track;
254 
255 	if (dsched_is_clear_buf_priv(bio->bio_buf))
256 		dsched_new_buf(bio->bio_buf);
257 
258 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
259 	(void)dev->si_ops->d_strategy(&ap);
260 }
261 
262 void
263 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
264 {
265 	struct dev_strategy_args ap;
266 
267 	ap.a_head.a_desc = &dev_strategy_desc;
268 	ap.a_head.a_dev = dev;
269 	ap.a_bio = bio;
270 
271 	KKASSERT(bio->bio_track != NULL);
272 	KKASSERT((bio->bio_flags & BIO_DONE) == 0);
273 	(void)dev->si_ops->d_strategy(&ap);
274 }
275 
276 /*
277  * note: the disk layer is expected to set count, blkno, and secsize before
278  * forwarding the message.
279  */
280 int
281 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
282     size_t length)
283 {
284 	struct dev_dump_args ap;
285 
286 	ap.a_head.a_desc = &dev_dump_desc;
287 	ap.a_head.a_dev = dev;
288 	ap.a_count = 0;
289 	ap.a_blkno = 0;
290 	ap.a_secsize = 0;
291 	ap.a_virtual = virtual;
292 	ap.a_physical = physical;
293 	ap.a_offset = offset;
294 	ap.a_length = length;
295 	return(dev->si_ops->d_dump(&ap));
296 }
297 
298 int64_t
299 dev_dpsize(cdev_t dev)
300 {
301 	struct dev_psize_args ap;
302 	int error;
303 
304 	ap.a_head.a_desc = &dev_psize_desc;
305 	ap.a_head.a_dev = dev;
306 	error = dev->si_ops->d_psize(&ap);
307 	if (error == 0)
308 		return (ap.a_result);
309 	return(-1);
310 }
311 
312 int
313 dev_dkqfilter(cdev_t dev, struct knote *kn)
314 {
315 	struct dev_kqfilter_args ap;
316 	int error;
317 
318 	ap.a_head.a_desc = &dev_kqfilter_desc;
319 	ap.a_head.a_dev = dev;
320 	ap.a_kn = kn;
321 	error = dev->si_ops->d_kqfilter(&ap);
322 	if (error == 0)
323 		return(ap.a_result);
324 	return(ENODEV);
325 }
326 
327 /************************************************************************
328  *			DEVICE HELPER FUNCTIONS				*
329  ************************************************************************/
330 
331 /*
332  * MPSAFE
333  */
334 int
335 dev_drefs(cdev_t dev)
336 {
337     return(dev->si_sysref.refcnt);
338 }
339 
340 /*
341  * MPSAFE
342  */
343 const char *
344 dev_dname(cdev_t dev)
345 {
346     return(dev->si_ops->head.name);
347 }
348 
349 /*
350  * MPSAFE
351  */
352 int
353 dev_dflags(cdev_t dev)
354 {
355     return(dev->si_ops->head.flags);
356 }
357 
358 /*
359  * MPSAFE
360  */
361 int
362 dev_dmaj(cdev_t dev)
363 {
364     return(dev->si_ops->head.maj);
365 }
366 
367 /*
368  * Used when forwarding a request through layers.  The caller adjusts
369  * ap->a_head.a_dev and then calls this function.
370  */
371 int
372 dev_doperate(struct dev_generic_args *ap)
373 {
374     int (*func)(struct dev_generic_args *);
375 
376     func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
377     return (func(ap));
378 }
379 
380 /*
381  * Used by the console intercept code only.  Issue an operation through
382  * a foreign ops structure allowing the ops structure associated
383  * with the device to remain intact.
384  */
385 int
386 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
387 {
388     int (*func)(struct dev_generic_args *);
389 
390     func = *(void **)((char *)ops + ap->a_desc->sd_offset);
391     return (func(ap));
392 }
393 
394 /*
395  * Convert a template dev_ops into the real thing by filling in
396  * uninitialized fields.
397  */
398 void
399 compile_dev_ops(struct dev_ops *ops)
400 {
401 	int offset;
402 
403 	for (offset = offsetof(struct dev_ops, dev_ops_first_field);
404 	     offset <= offsetof(struct dev_ops, dev_ops_last_field);
405 	     offset += sizeof(void *)
406 	) {
407 		void **func_p = (void **)((char *)ops + offset);
408 		void **def_p = (void **)((char *)&default_dev_ops + offset);
409 		if (*func_p == NULL) {
410 			if (ops->d_default)
411 				*func_p = ops->d_default;
412 			else
413 				*func_p = *def_p;
414 		}
415 	}
416 }
417 
418 /************************************************************************
419  *			MAJOR/MINOR SPACE FUNCTION 			*
420  ************************************************************************/
421 
422 /*
423  * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
424  *
425  * Disk devices typically register their major, e.g. 'ad0', and then call
426  * into the disk label management code which overloads its own onto e.g. 'ad0'
427  * to support all the various slice and partition combinations.
428  *
429  * The mask/match supplied in this call are a full 32 bits and the same
430  * mask and match must be specified in a later dev_ops_remove() call to
431  * match this add.  However, the match value for the minor number should never
432  * have any bits set in the major number's bit range (8-15).  The mask value
433  * may be conveniently specified as -1 without creating any major number
434  * interference.
435  */
436 
437 static
438 int
439 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
440 {
441     if (a->maj < b->maj)
442 	return(-1);
443     else if (a->maj > b->maj)
444 	return(1);
445     return(0);
446 }
447 
448 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
449 
450 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
451 
452 int
453 dev_ops_remove_all(struct dev_ops *ops)
454 {
455 	return devfs_destroy_dev_by_ops(ops, -1);
456 }
457 
458 int
459 dev_ops_remove_minor(struct dev_ops *ops, int minor)
460 {
461 	return devfs_destroy_dev_by_ops(ops, minor);
462 }
463 
464 struct dev_ops *
465 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
466 {
467 	struct dev_ops *oops = dev->si_ops;
468 
469 	compile_dev_ops(iops);
470 	iops->head.maj = oops->head.maj;
471 	iops->head.data = oops->head.data;
472 	iops->head.flags = oops->head.flags;
473 	dev->si_ops = iops;
474 	dev->si_flags |= SI_INTERCEPTED;
475 
476 	return (oops);
477 }
478 
479 void
480 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
481 {
482 	struct dev_ops *iops = dev->si_ops;
483 
484 	dev->si_ops = oops;
485 	dev->si_flags &= ~SI_INTERCEPTED;
486 	iops->head.maj = 0;
487 	iops->head.data = NULL;
488 	iops->head.flags = 0;
489 }
490 
491 /************************************************************************
492  *			DEFAULT DEV OPS FUNCTIONS			*
493  ************************************************************************/
494 
495 
496 /*
497  * Unsupported devswitch functions (e.g. for writing to read-only device).
498  * XXX may belong elsewhere.
499  */
500 int
501 norevoke(struct dev_revoke_args *ap)
502 {
503 	/* take no action */
504 	return(0);
505 }
506 
507 int
508 noclone(struct dev_clone_args *ap)
509 {
510 	/* take no action */
511 	return (0);	/* allow the clone */
512 }
513 
514 int
515 noopen(struct dev_open_args *ap)
516 {
517 	return (ENODEV);
518 }
519 
520 int
521 noclose(struct dev_close_args *ap)
522 {
523 	return (ENODEV);
524 }
525 
526 int
527 noread(struct dev_read_args *ap)
528 {
529 	return (ENODEV);
530 }
531 
532 int
533 nowrite(struct dev_write_args *ap)
534 {
535 	return (ENODEV);
536 }
537 
538 int
539 noioctl(struct dev_ioctl_args *ap)
540 {
541 	return (ENODEV);
542 }
543 
544 int
545 nokqfilter(struct dev_kqfilter_args *ap)
546 {
547 	return (ENODEV);
548 }
549 
550 int
551 nommap(struct dev_mmap_args *ap)
552 {
553 	return (ENODEV);
554 }
555 
556 int
557 nopoll(struct dev_poll_args *ap)
558 {
559 	ap->a_events = 0;
560 	return(0);
561 }
562 
563 int
564 nostrategy(struct dev_strategy_args *ap)
565 {
566 	struct bio *bio = ap->a_bio;
567 
568 	bio->bio_buf->b_flags |= B_ERROR;
569 	bio->bio_buf->b_error = EOPNOTSUPP;
570 	biodone(bio);
571 	return(0);
572 }
573 
574 int
575 nopsize(struct dev_psize_args *ap)
576 {
577 	ap->a_result = 0;
578 	return(0);
579 }
580 
581 int
582 nodump(struct dev_dump_args *ap)
583 {
584 	return (ENODEV);
585 }
586 
587 /*
588  * XXX this is probably bogus.  Any device that uses it isn't checking the
589  * minor number.
590  */
591 int
592 nullopen(struct dev_open_args *ap)
593 {
594 	return (0);
595 }
596 
597 int
598 nullclose(struct dev_close_args *ap)
599 {
600 	return (0);
601 }
602 
603