1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $ 31 */ 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/systm.h> 37 #include <sys/module.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/vnode.h> 43 #include <sys/queue.h> 44 #include <sys/device.h> 45 #include <sys/tree.h> 46 #include <sys/syslink_rpc.h> 47 #include <sys/proc.h> 48 #include <machine/stdarg.h> 49 #include <sys/thread2.h> 50 #include <vfs/devfs/devfs.h> 51 52 /* 53 * system link descriptors identify the command in the 54 * arguments structure. 55 */ 56 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 57 58 #define DEVOP_DESC_INIT(name) \ 59 struct syslink_desc DDESCNAME(name) = { \ 60 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 61 #name } 62 63 DEVOP_DESC_INIT(default); 64 DEVOP_DESC_INIT(open); 65 DEVOP_DESC_INIT(close); 66 DEVOP_DESC_INIT(read); 67 DEVOP_DESC_INIT(write); 68 DEVOP_DESC_INIT(ioctl); 69 DEVOP_DESC_INIT(dump); 70 DEVOP_DESC_INIT(psize); 71 DEVOP_DESC_INIT(poll); 72 DEVOP_DESC_INIT(mmap); 73 DEVOP_DESC_INIT(strategy); 74 DEVOP_DESC_INIT(kqfilter); 75 DEVOP_DESC_INIT(revoke); 76 DEVOP_DESC_INIT(clone); 77 78 /* 79 * Misc default ops 80 */ 81 struct dev_ops dead_dev_ops; 82 83 struct dev_ops default_dev_ops = { 84 { "null" }, 85 .d_default = NULL, /* must be NULL */ 86 .d_open = noopen, 87 .d_close = noclose, 88 .d_read = noread, 89 .d_write = nowrite, 90 .d_ioctl = noioctl, 91 .d_poll = nopoll, 92 .d_mmap = nommap, 93 .d_strategy = nostrategy, 94 .d_dump = nodump, 95 .d_psize = nopsize, 96 .d_kqfilter = nokqfilter, 97 .d_revoke = norevoke, 98 .d_clone = noclone 99 }; 100 101 /************************************************************************ 102 * GENERAL DEVICE API FUNCTIONS * 103 ************************************************************************/ 104 105 int 106 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred) 107 { 108 struct dev_open_args ap; 109 110 ap.a_head.a_desc = &dev_open_desc; 111 ap.a_head.a_dev = dev; 112 ap.a_oflags = oflags; 113 ap.a_devtype = devtype; 114 ap.a_cred = cred; 115 return(dev->si_ops->d_open(&ap)); 116 } 117 118 int 119 dev_dclose(cdev_t dev, int fflag, int devtype) 120 { 121 struct dev_close_args ap; 122 123 ap.a_head.a_desc = &dev_close_desc; 124 ap.a_head.a_dev = dev; 125 ap.a_fflag = fflag; 126 ap.a_devtype = devtype; 127 return(dev->si_ops->d_close(&ap)); 128 } 129 130 int 131 dev_dread(cdev_t dev, struct uio *uio, int ioflag) 132 { 133 struct dev_read_args ap; 134 int error; 135 136 ap.a_head.a_desc = &dev_read_desc; 137 ap.a_head.a_dev = dev; 138 ap.a_uio = uio; 139 ap.a_ioflag = ioflag; 140 error = dev->si_ops->d_read(&ap); 141 if (error == 0) 142 dev->si_lastread = time_second; 143 return (error); 144 } 145 146 int 147 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag) 148 { 149 struct dev_write_args ap; 150 int error; 151 152 dev->si_lastwrite = time_second; 153 ap.a_head.a_desc = &dev_write_desc; 154 ap.a_head.a_dev = dev; 155 ap.a_uio = uio; 156 ap.a_ioflag = ioflag; 157 error = dev->si_ops->d_write(&ap); 158 return (error); 159 } 160 161 int 162 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred) 163 { 164 struct dev_ioctl_args ap; 165 166 ap.a_head.a_desc = &dev_ioctl_desc; 167 ap.a_head.a_dev = dev; 168 ap.a_cmd = cmd; 169 ap.a_data = data; 170 ap.a_fflag = fflag; 171 ap.a_cred = cred; 172 return(dev->si_ops->d_ioctl(&ap)); 173 } 174 175 int 176 dev_dpoll(cdev_t dev, int events) 177 { 178 struct dev_poll_args ap; 179 int error; 180 181 ap.a_head.a_desc = &dev_poll_desc; 182 ap.a_head.a_dev = dev; 183 ap.a_events = events; 184 error = dev->si_ops->d_poll(&ap); 185 if (error == 0) 186 return(ap.a_events); 187 return (seltrue(dev, events)); 188 } 189 190 int 191 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot) 192 { 193 struct dev_mmap_args ap; 194 int error; 195 196 ap.a_head.a_desc = &dev_mmap_desc; 197 ap.a_head.a_dev = dev; 198 ap.a_offset = offset; 199 ap.a_nprot = nprot; 200 error = dev->si_ops->d_mmap(&ap); 201 if (error == 0) 202 return(ap.a_result); 203 return(-1); 204 } 205 206 int 207 dev_dclone(cdev_t dev) 208 { 209 struct dev_clone_args ap; 210 211 ap.a_head.a_desc = &dev_clone_desc; 212 ap.a_head.a_dev = dev; 213 return (dev->si_ops->d_clone(&ap)); 214 } 215 216 int 217 dev_drevoke(cdev_t dev) 218 { 219 struct dev_revoke_args ap; 220 221 ap.a_head.a_desc = &dev_revoke_desc; 222 ap.a_head.a_dev = dev; 223 return (dev->si_ops->d_revoke(&ap)); 224 } 225 226 /* 227 * Core device strategy call, used to issue I/O on a device. There are 228 * two versions, a non-chained version and a chained version. The chained 229 * version reuses a BIO set up by vn_strategy(). The only difference is 230 * that, for now, we do not push a new tracking structure when chaining 231 * from vn_strategy. XXX this will ultimately have to change. 232 */ 233 void 234 dev_dstrategy(cdev_t dev, struct bio *bio) 235 { 236 struct dev_strategy_args ap; 237 struct bio_track *track; 238 239 ap.a_head.a_desc = &dev_strategy_desc; 240 ap.a_head.a_dev = dev; 241 ap.a_bio = bio; 242 243 KKASSERT(bio->bio_track == NULL); 244 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 245 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 246 track = &dev->si_track_read; 247 else 248 track = &dev->si_track_write; 249 bio_track_ref(track); 250 bio->bio_track = track; 251 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 252 (void)dev->si_ops->d_strategy(&ap); 253 } 254 255 void 256 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 257 { 258 struct dev_strategy_args ap; 259 260 ap.a_head.a_desc = &dev_strategy_desc; 261 ap.a_head.a_dev = dev; 262 ap.a_bio = bio; 263 264 KKASSERT(bio->bio_track != NULL); 265 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 266 (void)dev->si_ops->d_strategy(&ap); 267 } 268 269 /* 270 * note: the disk layer is expected to set count, blkno, and secsize before 271 * forwarding the message. 272 */ 273 int 274 dev_ddump(cdev_t dev) 275 { 276 struct dev_dump_args ap; 277 278 ap.a_head.a_desc = &dev_dump_desc; 279 ap.a_head.a_dev = dev; 280 ap.a_count = 0; 281 ap.a_blkno = 0; 282 ap.a_secsize = 0; 283 return(dev->si_ops->d_dump(&ap)); 284 } 285 286 int64_t 287 dev_dpsize(cdev_t dev) 288 { 289 struct dev_psize_args ap; 290 int error; 291 292 ap.a_head.a_desc = &dev_psize_desc; 293 ap.a_head.a_dev = dev; 294 error = dev->si_ops->d_psize(&ap); 295 if (error == 0) 296 return (ap.a_result); 297 return(-1); 298 } 299 300 int 301 dev_dkqfilter(cdev_t dev, struct knote *kn) 302 { 303 struct dev_kqfilter_args ap; 304 int error; 305 306 ap.a_head.a_desc = &dev_kqfilter_desc; 307 ap.a_head.a_dev = dev; 308 ap.a_kn = kn; 309 error = dev->si_ops->d_kqfilter(&ap); 310 if (error == 0) 311 return(ap.a_result); 312 return(ENODEV); 313 } 314 315 /************************************************************************ 316 * DEVICE HELPER FUNCTIONS * 317 ************************************************************************/ 318 319 /* 320 * MPSAFE 321 */ 322 int 323 dev_drefs(cdev_t dev) 324 { 325 return(dev->si_sysref.refcnt); 326 } 327 328 /* 329 * MPSAFE 330 */ 331 const char * 332 dev_dname(cdev_t dev) 333 { 334 return(dev->si_ops->head.name); 335 } 336 337 /* 338 * MPSAFE 339 */ 340 int 341 dev_dflags(cdev_t dev) 342 { 343 return(dev->si_ops->head.flags); 344 } 345 346 /* 347 * MPSAFE 348 */ 349 int 350 dev_dmaj(cdev_t dev) 351 { 352 return(dev->si_ops->head.maj); 353 } 354 355 /* 356 * Used when forwarding a request through layers. The caller adjusts 357 * ap->a_head.a_dev and then calls this function. 358 */ 359 int 360 dev_doperate(struct dev_generic_args *ap) 361 { 362 int (*func)(struct dev_generic_args *); 363 364 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 365 return (func(ap)); 366 } 367 368 /* 369 * Used by the console intercept code only. Issue an operation through 370 * a foreign ops structure allowing the ops structure associated 371 * with the device to remain intact. 372 */ 373 int 374 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 375 { 376 int (*func)(struct dev_generic_args *); 377 378 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 379 return (func(ap)); 380 } 381 382 /* 383 * Convert a template dev_ops into the real thing by filling in 384 * uninitialized fields. 385 */ 386 void 387 compile_dev_ops(struct dev_ops *ops) 388 { 389 int offset; 390 391 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 392 offset <= offsetof(struct dev_ops, dev_ops_last_field); 393 offset += sizeof(void *) 394 ) { 395 void **func_p = (void **)((char *)ops + offset); 396 void **def_p = (void **)((char *)&default_dev_ops + offset); 397 if (*func_p == NULL) { 398 if (ops->d_default) 399 *func_p = ops->d_default; 400 else 401 *func_p = *def_p; 402 } 403 } 404 } 405 406 /************************************************************************ 407 * MAJOR/MINOR SPACE FUNCTION * 408 ************************************************************************/ 409 410 /* 411 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 412 * 413 * Disk devices typically register their major, e.g. 'ad0', and then call 414 * into the disk label management code which overloads its own onto e.g. 'ad0' 415 * to support all the various slice and partition combinations. 416 * 417 * The mask/match supplied in this call are a full 32 bits and the same 418 * mask and match must be specified in a later dev_ops_remove() call to 419 * match this add. However, the match value for the minor number should never 420 * have any bits set in the major number's bit range (8-15). The mask value 421 * may be conveniently specified as -1 without creating any major number 422 * interference. 423 */ 424 425 static 426 int 427 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 428 { 429 if (a->maj < b->maj) 430 return(-1); 431 else if (a->maj > b->maj) 432 return(1); 433 return(0); 434 } 435 436 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 437 438 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 439 440 /* 441 * Remove all matching dev_ops entries from the dev_ops_array[] major 442 * array so no new user opens can be performed, and destroy all devices 443 * installed in the hash table that are associated with this dev_ops. (see 444 * destroy_all_devs()). 445 */ 446 int 447 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match) 448 { 449 struct dev_ops_maj *rbmaj; 450 struct dev_ops_link *link; 451 struct dev_ops_link **plink; 452 453 if (ops != &dead_dev_ops) 454 destroy_all_devs(ops, mask, match); 455 456 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj); 457 if (rbmaj == NULL) { 458 kprintf("double-remove of dev_ops %p for %s(%d)\n", 459 ops, ops->head.name, ops->head.maj); 460 return(0); 461 } 462 for (plink = &rbmaj->link; (link = *plink) != NULL; 463 plink = &link->next) { 464 if (link->mask == mask && link->match == match) { 465 if (link->ops == ops) 466 break; 467 kprintf("%s: ERROR: cannot remove dev_ops, " 468 "its major number %d was stolen by %s\n", 469 ops->head.name, ops->head.maj, 470 link->ops->head.name 471 ); 472 } 473 } 474 if (link == NULL) { 475 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed " 476 "multiple times!\n", 477 ops->head.name, ops->head.maj, mask, match); 478 } else { 479 *plink = link->next; 480 --ops->head.refs; /* XXX ops_release() / record refs */ 481 kfree(link, M_DEVBUF); 482 } 483 484 /* 485 * Scrap the RB tree node for the major number if no ops are 486 * installed any longer. 487 */ 488 if (rbmaj->link == NULL) { 489 dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead, rbmaj); 490 kfree(rbmaj, M_DEVBUF); 491 } 492 493 #if 0 494 /* 495 * The same ops might be used with multiple devices, so don't 496 * complain if the ref count is non-zero. 497 */ 498 if (ops->head.refs != 0) { 499 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called " 500 "while %d device refs still exist!\n", 501 ops->head.name, ops->head.maj, mask, match, 502 ops->head.refs); 503 } else { 504 if (bootverbose) 505 kprintf("%s: ops removed\n", ops->head.name); 506 } 507 #endif 508 return 0; 509 } 510 511 int dev_ops_remove_all(struct dev_ops *ops) 512 { 513 return devfs_destroy_dev_by_ops(ops, -1); 514 } 515 516 int dev_ops_remove_minor(struct dev_ops *ops, int minor) 517 { 518 return devfs_destroy_dev_by_ops(ops, minor); 519 } 520 521 /* 522 * Release a ops entry. When the ref count reaches zero, recurse 523 * through the stack. 524 */ 525 void 526 dev_ops_release(struct dev_ops *ops) 527 { 528 return; 529 --ops->head.refs; 530 if (ops->head.refs == 0) { 531 /* XXX */ 532 } 533 } 534 535 struct dev_ops * 536 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 537 { 538 struct dev_ops *oops = dev->si_ops; 539 540 compile_dev_ops(iops); 541 iops->head.maj = oops->head.maj; 542 iops->head.data = oops->head.data; 543 iops->head.flags = oops->head.flags; 544 dev->si_ops = iops; 545 dev->si_flags |= SI_INTERCEPTED; 546 547 return (oops); 548 } 549 550 void 551 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 552 { 553 struct dev_ops *iops = dev->si_ops; 554 555 dev->si_ops = oops; 556 dev->si_flags &= ~SI_INTERCEPTED; 557 iops->head.maj = 0; 558 iops->head.data = NULL; 559 iops->head.flags = 0; 560 } 561 562 /************************************************************************ 563 * DEFAULT DEV OPS FUNCTIONS * 564 ************************************************************************/ 565 566 567 /* 568 * Unsupported devswitch functions (e.g. for writing to read-only device). 569 * XXX may belong elsewhere. 570 */ 571 int 572 norevoke(struct dev_revoke_args *ap) 573 { 574 /* take no action */ 575 return(0); 576 } 577 578 int 579 noclone(struct dev_clone_args *ap) 580 { 581 /* take no action */ 582 return (0); /* allow the clone */ 583 } 584 585 int 586 noopen(struct dev_open_args *ap) 587 { 588 return (ENODEV); 589 } 590 591 int 592 noclose(struct dev_close_args *ap) 593 { 594 return (ENODEV); 595 } 596 597 int 598 noread(struct dev_read_args *ap) 599 { 600 return (ENODEV); 601 } 602 603 int 604 nowrite(struct dev_write_args *ap) 605 { 606 return (ENODEV); 607 } 608 609 int 610 noioctl(struct dev_ioctl_args *ap) 611 { 612 return (ENODEV); 613 } 614 615 int 616 nokqfilter(struct dev_kqfilter_args *ap) 617 { 618 return (ENODEV); 619 } 620 621 int 622 nommap(struct dev_mmap_args *ap) 623 { 624 return (ENODEV); 625 } 626 627 int 628 nopoll(struct dev_poll_args *ap) 629 { 630 ap->a_events = 0; 631 return(0); 632 } 633 634 int 635 nostrategy(struct dev_strategy_args *ap) 636 { 637 struct bio *bio = ap->a_bio; 638 639 bio->bio_buf->b_flags |= B_ERROR; 640 bio->bio_buf->b_error = EOPNOTSUPP; 641 biodone(bio); 642 return(0); 643 } 644 645 int 646 nopsize(struct dev_psize_args *ap) 647 { 648 ap->a_result = 0; 649 return(0); 650 } 651 652 int 653 nodump(struct dev_dump_args *ap) 654 { 655 return (ENODEV); 656 } 657 658 /* 659 * XXX this is probably bogus. Any device that uses it isn't checking the 660 * minor number. 661 */ 662 int 663 nullopen(struct dev_open_args *ap) 664 { 665 return (0); 666 } 667 668 int 669 nullclose(struct dev_close_args *ap) 670 { 671 return (0); 672 } 673 674