1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert 4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer, 5 * All rights reserved. 6 * Copyright (c) 1982, 1986, 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $ 31 */ 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/systm.h> 37 #include <sys/module.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/bio.h> 41 #include <sys/buf.h> 42 #include <sys/vnode.h> 43 #include <sys/queue.h> 44 #include <sys/device.h> 45 #include <sys/tree.h> 46 #include <sys/syslink_rpc.h> 47 #include <sys/proc.h> 48 #include <machine/stdarg.h> 49 #include <sys/thread2.h> 50 #include <sys/devfs.h> 51 #include <sys/dsched.h> 52 53 /* 54 * system link descriptors identify the command in the 55 * arguments structure. 56 */ 57 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc) 58 59 #define DEVOP_DESC_INIT(name) \ 60 struct syslink_desc DDESCNAME(name) = { \ 61 __offsetof(struct dev_ops, __CONCAT(d_, name)), \ 62 #name } 63 64 DEVOP_DESC_INIT(default); 65 DEVOP_DESC_INIT(open); 66 DEVOP_DESC_INIT(close); 67 DEVOP_DESC_INIT(read); 68 DEVOP_DESC_INIT(write); 69 DEVOP_DESC_INIT(ioctl); 70 DEVOP_DESC_INIT(dump); 71 DEVOP_DESC_INIT(psize); 72 DEVOP_DESC_INIT(poll); 73 DEVOP_DESC_INIT(mmap); 74 DEVOP_DESC_INIT(strategy); 75 DEVOP_DESC_INIT(kqfilter); 76 DEVOP_DESC_INIT(revoke); 77 DEVOP_DESC_INIT(clone); 78 79 /* 80 * Misc default ops 81 */ 82 struct dev_ops dead_dev_ops; 83 84 struct dev_ops default_dev_ops = { 85 { "null" }, 86 .d_default = NULL, /* must be NULL */ 87 .d_open = noopen, 88 .d_close = noclose, 89 .d_read = noread, 90 .d_write = nowrite, 91 .d_ioctl = noioctl, 92 .d_poll = nopoll, 93 .d_mmap = nommap, 94 .d_strategy = nostrategy, 95 .d_dump = nodump, 96 .d_psize = nopsize, 97 .d_kqfilter = nokqfilter, 98 .d_revoke = norevoke, 99 .d_clone = noclone 100 }; 101 102 /************************************************************************ 103 * GENERAL DEVICE API FUNCTIONS * 104 ************************************************************************/ 105 106 int 107 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred) 108 { 109 struct dev_open_args ap; 110 111 ap.a_head.a_desc = &dev_open_desc; 112 ap.a_head.a_dev = dev; 113 ap.a_oflags = oflags; 114 ap.a_devtype = devtype; 115 ap.a_cred = cred; 116 return(dev->si_ops->d_open(&ap)); 117 } 118 119 int 120 dev_dclose(cdev_t dev, int fflag, int devtype) 121 { 122 struct dev_close_args ap; 123 124 ap.a_head.a_desc = &dev_close_desc; 125 ap.a_head.a_dev = dev; 126 ap.a_fflag = fflag; 127 ap.a_devtype = devtype; 128 return(dev->si_ops->d_close(&ap)); 129 } 130 131 int 132 dev_dread(cdev_t dev, struct uio *uio, int ioflag) 133 { 134 struct dev_read_args ap; 135 int error; 136 137 ap.a_head.a_desc = &dev_read_desc; 138 ap.a_head.a_dev = dev; 139 ap.a_uio = uio; 140 ap.a_ioflag = ioflag; 141 error = dev->si_ops->d_read(&ap); 142 if (error == 0) 143 dev->si_lastread = time_second; 144 return (error); 145 } 146 147 int 148 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag) 149 { 150 struct dev_write_args ap; 151 int error; 152 153 dev->si_lastwrite = time_second; 154 ap.a_head.a_desc = &dev_write_desc; 155 ap.a_head.a_dev = dev; 156 ap.a_uio = uio; 157 ap.a_ioflag = ioflag; 158 error = dev->si_ops->d_write(&ap); 159 return (error); 160 } 161 162 int 163 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred, 164 struct sysmsg *msg) 165 { 166 struct dev_ioctl_args ap; 167 168 ap.a_head.a_desc = &dev_ioctl_desc; 169 ap.a_head.a_dev = dev; 170 ap.a_cmd = cmd; 171 ap.a_data = data; 172 ap.a_fflag = fflag; 173 ap.a_cred = cred; 174 ap.a_sysmsg = msg; 175 return(dev->si_ops->d_ioctl(&ap)); 176 } 177 178 int 179 dev_dpoll(cdev_t dev, int events) 180 { 181 struct dev_poll_args ap; 182 int error; 183 184 ap.a_head.a_desc = &dev_poll_desc; 185 ap.a_head.a_dev = dev; 186 ap.a_events = events; 187 error = dev->si_ops->d_poll(&ap); 188 if (error == 0) 189 return(ap.a_events); 190 return (seltrue(dev, events)); 191 } 192 193 int 194 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot) 195 { 196 struct dev_mmap_args ap; 197 int error; 198 199 ap.a_head.a_desc = &dev_mmap_desc; 200 ap.a_head.a_dev = dev; 201 ap.a_offset = offset; 202 ap.a_nprot = nprot; 203 error = dev->si_ops->d_mmap(&ap); 204 if (error == 0) 205 return(ap.a_result); 206 return(-1); 207 } 208 209 int 210 dev_dclone(cdev_t dev) 211 { 212 struct dev_clone_args ap; 213 214 ap.a_head.a_desc = &dev_clone_desc; 215 ap.a_head.a_dev = dev; 216 return (dev->si_ops->d_clone(&ap)); 217 } 218 219 int 220 dev_drevoke(cdev_t dev) 221 { 222 struct dev_revoke_args ap; 223 224 ap.a_head.a_desc = &dev_revoke_desc; 225 ap.a_head.a_dev = dev; 226 return (dev->si_ops->d_revoke(&ap)); 227 } 228 229 /* 230 * Core device strategy call, used to issue I/O on a device. There are 231 * two versions, a non-chained version and a chained version. The chained 232 * version reuses a BIO set up by vn_strategy(). The only difference is 233 * that, for now, we do not push a new tracking structure when chaining 234 * from vn_strategy. XXX this will ultimately have to change. 235 */ 236 void 237 dev_dstrategy(cdev_t dev, struct bio *bio) 238 { 239 struct dev_strategy_args ap; 240 struct bio_track *track; 241 242 ap.a_head.a_desc = &dev_strategy_desc; 243 ap.a_head.a_dev = dev; 244 ap.a_bio = bio; 245 246 KKASSERT(bio->bio_track == NULL); 247 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE); 248 if (bio->bio_buf->b_cmd == BUF_CMD_READ) 249 track = &dev->si_track_read; 250 else 251 track = &dev->si_track_write; 252 bio_track_ref(track); 253 bio->bio_track = track; 254 255 if (dsched_is_clear_buf_priv(bio->bio_buf)) 256 dsched_new_buf(bio->bio_buf); 257 258 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 259 (void)dev->si_ops->d_strategy(&ap); 260 } 261 262 void 263 dev_dstrategy_chain(cdev_t dev, struct bio *bio) 264 { 265 struct dev_strategy_args ap; 266 267 ap.a_head.a_desc = &dev_strategy_desc; 268 ap.a_head.a_dev = dev; 269 ap.a_bio = bio; 270 271 KKASSERT(bio->bio_track != NULL); 272 KKASSERT((bio->bio_flags & BIO_DONE) == 0); 273 (void)dev->si_ops->d_strategy(&ap); 274 } 275 276 /* 277 * note: the disk layer is expected to set count, blkno, and secsize before 278 * forwarding the message. 279 */ 280 int 281 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset, 282 size_t length) 283 { 284 struct dev_dump_args ap; 285 286 ap.a_head.a_desc = &dev_dump_desc; 287 ap.a_head.a_dev = dev; 288 ap.a_count = 0; 289 ap.a_blkno = 0; 290 ap.a_secsize = 0; 291 ap.a_virtual = virtual; 292 ap.a_physical = physical; 293 ap.a_offset = offset; 294 ap.a_length = length; 295 return(dev->si_ops->d_dump(&ap)); 296 } 297 298 int64_t 299 dev_dpsize(cdev_t dev) 300 { 301 struct dev_psize_args ap; 302 int error; 303 304 ap.a_head.a_desc = &dev_psize_desc; 305 ap.a_head.a_dev = dev; 306 error = dev->si_ops->d_psize(&ap); 307 if (error == 0) 308 return (ap.a_result); 309 return(-1); 310 } 311 312 int 313 dev_dkqfilter(cdev_t dev, struct knote *kn) 314 { 315 struct dev_kqfilter_args ap; 316 int error; 317 318 ap.a_head.a_desc = &dev_kqfilter_desc; 319 ap.a_head.a_dev = dev; 320 ap.a_kn = kn; 321 error = dev->si_ops->d_kqfilter(&ap); 322 if (error == 0) 323 return(ap.a_result); 324 return(ENODEV); 325 } 326 327 /************************************************************************ 328 * DEVICE HELPER FUNCTIONS * 329 ************************************************************************/ 330 331 /* 332 * MPSAFE 333 */ 334 int 335 dev_drefs(cdev_t dev) 336 { 337 return(dev->si_sysref.refcnt); 338 } 339 340 /* 341 * MPSAFE 342 */ 343 const char * 344 dev_dname(cdev_t dev) 345 { 346 return(dev->si_ops->head.name); 347 } 348 349 /* 350 * MPSAFE 351 */ 352 int 353 dev_dflags(cdev_t dev) 354 { 355 return(dev->si_ops->head.flags); 356 } 357 358 /* 359 * MPSAFE 360 */ 361 int 362 dev_dmaj(cdev_t dev) 363 { 364 return(dev->si_ops->head.maj); 365 } 366 367 /* 368 * Used when forwarding a request through layers. The caller adjusts 369 * ap->a_head.a_dev and then calls this function. 370 */ 371 int 372 dev_doperate(struct dev_generic_args *ap) 373 { 374 int (*func)(struct dev_generic_args *); 375 376 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset); 377 return (func(ap)); 378 } 379 380 /* 381 * Used by the console intercept code only. Issue an operation through 382 * a foreign ops structure allowing the ops structure associated 383 * with the device to remain intact. 384 */ 385 int 386 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap) 387 { 388 int (*func)(struct dev_generic_args *); 389 390 func = *(void **)((char *)ops + ap->a_desc->sd_offset); 391 return (func(ap)); 392 } 393 394 /* 395 * Convert a template dev_ops into the real thing by filling in 396 * uninitialized fields. 397 */ 398 void 399 compile_dev_ops(struct dev_ops *ops) 400 { 401 int offset; 402 403 for (offset = offsetof(struct dev_ops, dev_ops_first_field); 404 offset <= offsetof(struct dev_ops, dev_ops_last_field); 405 offset += sizeof(void *) 406 ) { 407 void **func_p = (void **)((char *)ops + offset); 408 void **def_p = (void **)((char *)&default_dev_ops + offset); 409 if (*func_p == NULL) { 410 if (ops->d_default) 411 *func_p = ops->d_default; 412 else 413 *func_p = *def_p; 414 } 415 } 416 } 417 418 /************************************************************************ 419 * MAJOR/MINOR SPACE FUNCTION * 420 ************************************************************************/ 421 422 /* 423 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>). 424 * 425 * Disk devices typically register their major, e.g. 'ad0', and then call 426 * into the disk label management code which overloads its own onto e.g. 'ad0' 427 * to support all the various slice and partition combinations. 428 * 429 * The mask/match supplied in this call are a full 32 bits and the same 430 * mask and match must be specified in a later dev_ops_remove() call to 431 * match this add. However, the match value for the minor number should never 432 * have any bits set in the major number's bit range (8-15). The mask value 433 * may be conveniently specified as -1 without creating any major number 434 * interference. 435 */ 436 437 static 438 int 439 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b) 440 { 441 if (a->maj < b->maj) 442 return(-1); 443 else if (a->maj > b->maj) 444 return(1); 445 return(0); 446 } 447 448 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj); 449 450 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead); 451 452 int 453 dev_ops_remove_all(struct dev_ops *ops) 454 { 455 return devfs_destroy_dev_by_ops(ops, -1); 456 } 457 458 int 459 dev_ops_remove_minor(struct dev_ops *ops, int minor) 460 { 461 return devfs_destroy_dev_by_ops(ops, minor); 462 } 463 464 struct dev_ops * 465 dev_ops_intercept(cdev_t dev, struct dev_ops *iops) 466 { 467 struct dev_ops *oops = dev->si_ops; 468 469 compile_dev_ops(iops); 470 iops->head.maj = oops->head.maj; 471 iops->head.data = oops->head.data; 472 iops->head.flags = oops->head.flags; 473 dev->si_ops = iops; 474 dev->si_flags |= SI_INTERCEPTED; 475 476 return (oops); 477 } 478 479 void 480 dev_ops_restore(cdev_t dev, struct dev_ops *oops) 481 { 482 struct dev_ops *iops = dev->si_ops; 483 484 dev->si_ops = oops; 485 dev->si_flags &= ~SI_INTERCEPTED; 486 iops->head.maj = 0; 487 iops->head.data = NULL; 488 iops->head.flags = 0; 489 } 490 491 /************************************************************************ 492 * DEFAULT DEV OPS FUNCTIONS * 493 ************************************************************************/ 494 495 496 /* 497 * Unsupported devswitch functions (e.g. for writing to read-only device). 498 * XXX may belong elsewhere. 499 */ 500 int 501 norevoke(struct dev_revoke_args *ap) 502 { 503 /* take no action */ 504 return(0); 505 } 506 507 int 508 noclone(struct dev_clone_args *ap) 509 { 510 /* take no action */ 511 return (0); /* allow the clone */ 512 } 513 514 int 515 noopen(struct dev_open_args *ap) 516 { 517 return (ENODEV); 518 } 519 520 int 521 noclose(struct dev_close_args *ap) 522 { 523 return (ENODEV); 524 } 525 526 int 527 noread(struct dev_read_args *ap) 528 { 529 return (ENODEV); 530 } 531 532 int 533 nowrite(struct dev_write_args *ap) 534 { 535 return (ENODEV); 536 } 537 538 int 539 noioctl(struct dev_ioctl_args *ap) 540 { 541 return (ENODEV); 542 } 543 544 int 545 nokqfilter(struct dev_kqfilter_args *ap) 546 { 547 return (ENODEV); 548 } 549 550 int 551 nommap(struct dev_mmap_args *ap) 552 { 553 return (ENODEV); 554 } 555 556 int 557 nopoll(struct dev_poll_args *ap) 558 { 559 ap->a_events = 0; 560 return(0); 561 } 562 563 int 564 nostrategy(struct dev_strategy_args *ap) 565 { 566 struct bio *bio = ap->a_bio; 567 568 bio->bio_buf->b_flags |= B_ERROR; 569 bio->bio_buf->b_error = EOPNOTSUPP; 570 biodone(bio); 571 return(0); 572 } 573 574 int 575 nopsize(struct dev_psize_args *ap) 576 { 577 ap->a_result = 0; 578 return(0); 579 } 580 581 int 582 nodump(struct dev_dump_args *ap) 583 { 584 return (ENODEV); 585 } 586 587 /* 588 * XXX this is probably bogus. Any device that uses it isn't checking the 589 * minor number. 590 */ 591 int 592 nullopen(struct dev_open_args *ap) 593 { 594 return (0); 595 } 596 597 int 598 nullclose(struct dev_close_args *ap) 599 { 600 return (0); 601 } 602 603