1 /* 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/disklabel.h> 42 #include <sys/disklabel32.h> 43 #include <sys/disklabel64.h> 44 #include <sys/diskslice.h> 45 #include <sys/diskmbr.h> 46 #include <sys/disk.h> 47 #include <sys/malloc.h> 48 #include <sys/device.h> 49 #include <sys/devfs.h> 50 #include <sys/thread.h> 51 #include <sys/queue.h> 52 #include <sys/lock.h> 53 #include <sys/stat.h> 54 #include <sys/uuid.h> 55 #include <sys/dmsg.h> 56 57 #include <sys/buf2.h> 58 #include <sys/mplock2.h> 59 #include <sys/msgport2.h> 60 #include <sys/thread2.h> 61 62 struct dios_open { 63 int openrd; 64 int openwr; 65 }; 66 67 struct dios_io { 68 int count; 69 int eof; 70 }; 71 72 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg"); 73 74 static int disk_iocom_reconnect(struct disk *dp, struct file *fp); 75 static int disk_rcvdmsg(kdmsg_msg_t *msg); 76 77 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg); 78 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg); 79 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg); 80 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg); 81 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg); 82 static void diskiodone(struct bio *bio); 83 84 void 85 disk_iocom_init(struct disk *dp) 86 { 87 kdmsg_iocom_init(&dp->d_iocom, dp, 88 KDMSG_IOCOMF_AUTOCONN | 89 KDMSG_IOCOMF_AUTORXSPAN | 90 KDMSG_IOCOMF_AUTOTXSPAN, 91 M_DMSG_DISK, disk_rcvdmsg); 92 } 93 94 void 95 disk_iocom_update(struct disk *dp) 96 { 97 } 98 99 void 100 disk_iocom_uninit(struct disk *dp) 101 { 102 kdmsg_iocom_uninit(&dp->d_iocom); 103 } 104 105 int 106 disk_iocom_ioctl(struct disk *dp, int cmd, void *data) 107 { 108 struct file *fp; 109 struct disk_ioc_recluster *recl; 110 int error; 111 112 switch(cmd) { 113 case DIOCRECLUSTER: 114 recl = data; 115 fp = holdfp(curproc->p_fd, recl->fd, -1); 116 if (fp) { 117 error = disk_iocom_reconnect(dp, fp); 118 } else { 119 error = EINVAL; 120 } 121 break; 122 default: 123 error = EOPNOTSUPP; 124 break; 125 } 126 return error; 127 } 128 129 static 130 int 131 disk_iocom_reconnect(struct disk *dp, struct file *fp) 132 { 133 char devname[64]; 134 135 ksnprintf(devname, sizeof(devname), "%s%d", 136 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev)); 137 138 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname); 139 140 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER; 141 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 142 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK; 143 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK; 144 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 145 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label, 146 sizeof(dp->d_iocom.auto_lnk_conn.cl_label), 147 "%s/%s", hostname, devname); 148 if (dp->d_info.d_serialno) { 149 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label, 150 sizeof(dp->d_iocom.auto_lnk_conn.fs_label), 151 "%s", dp->d_info.d_serialno); 152 } 153 154 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER; 155 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 156 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK; 157 dp->d_iocom.auto_lnk_span.media.block.bytes = 158 dp->d_info.d_media_size; 159 dp->d_iocom.auto_lnk_span.media.block.blksize = 160 dp->d_info.d_media_blksize; 161 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label, 162 sizeof(dp->d_iocom.auto_lnk_span.cl_label), 163 "%s/%s", hostname, devname); 164 if (dp->d_info.d_serialno) { 165 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label, 166 sizeof(dp->d_iocom.auto_lnk_span.fs_label), 167 "%s", dp->d_info.d_serialno); 168 } 169 170 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL); 171 172 return (0); 173 } 174 175 int 176 disk_rcvdmsg(kdmsg_msg_t *msg) 177 { 178 struct disk *dp = msg->state->iocom->handle; 179 180 /* 181 * Handle debug messages (these might not be in transactions) 182 */ 183 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) { 184 case DMSG_DBG_SHELL: 185 /* 186 * Execute shell command (not supported atm) 187 */ 188 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 189 return(0); 190 case DMSG_DBG_SHELL | DMSGF_REPLY: 191 if (msg->aux_data) { 192 msg->aux_data[msg->aux_size - 1] = 0; 193 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data); 194 } 195 return(0); 196 } 197 198 /* 199 * All remaining messages must be in a transaction 200 * 201 * NOTE! We are switching on the first message's command. The 202 * actual message command within the transaction may be 203 * different (if streaming within a transaction). 204 */ 205 if (msg->state == &msg->state->iocom->state0) { 206 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 207 return(0); 208 } 209 210 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) { 211 case DMSG_BLK_OPEN: 212 case DMSG_BLK_CLOSE: 213 disk_blk_open(dp, msg); 214 break; 215 case DMSG_BLK_READ: 216 disk_blk_read(dp, msg); 217 break; 218 case DMSG_BLK_WRITE: 219 disk_blk_write(dp, msg); 220 break; 221 case DMSG_BLK_FLUSH: 222 disk_blk_flush(dp, msg); 223 break; 224 case DMSG_BLK_FREEBLKS: 225 disk_blk_freeblks(dp, msg); 226 break; 227 default: 228 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) { 229 if (msg->any.head.cmd & DMSGF_DELETE) 230 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 231 else 232 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP); 233 } 234 break; 235 } 236 return (0); 237 } 238 239 static 240 void 241 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg) 242 { 243 struct dios_open *openst; 244 int error = DMSG_ERR_NOSUPP; 245 int fflags; 246 247 openst = msg->state->any.any; 248 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) { 249 if (openst == NULL) { 250 openst = kmalloc(sizeof(*openst), M_DEVBUF, 251 M_WAITOK | M_ZERO); 252 msg->state->any.any = openst; 253 } 254 fflags = 0; 255 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 256 fflags = FREAD; 257 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 258 fflags |= FWRITE; 259 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL); 260 if (error) { 261 error = DMSG_ERR_IO; 262 } else { 263 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 264 ++openst->openrd; 265 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 266 ++openst->openwr; 267 } 268 } 269 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE && 270 openst) { 271 fflags = 0; 272 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) && 273 openst->openrd) { 274 fflags = FREAD; 275 } 276 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) && 277 openst->openwr) { 278 fflags |= FWRITE; 279 } 280 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL); 281 if (error) { 282 error = DMSG_ERR_IO; 283 } else { 284 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD) 285 --openst->openrd; 286 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR) 287 --openst->openwr; 288 } 289 } 290 if (msg->any.head.cmd & DMSGF_DELETE) { 291 if (openst) { 292 while (openst->openrd && openst->openwr) { 293 --openst->openrd; 294 --openst->openwr; 295 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL); 296 } 297 while (openst->openrd) { 298 --openst->openrd; 299 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL); 300 } 301 while (openst->openwr) { 302 --openst->openwr; 303 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL); 304 } 305 kfree(openst, M_DEVBUF); 306 msg->state->any.any = NULL; 307 } 308 kdmsg_msg_reply(msg, error); 309 } else { 310 kdmsg_msg_result(msg, error); 311 } 312 } 313 314 static 315 void 316 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg) 317 { 318 struct dios_io *iost; 319 struct buf *bp; 320 struct bio *bio; 321 int error = DMSG_ERR_NOSUPP; 322 int reterr = 1; 323 324 /* 325 * Only DMSG_BLK_READ commands imply read ops. 326 */ 327 iost = msg->state->any.any; 328 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) { 329 if (msg->any.blk_read.bytes < DEV_BSIZE || 330 msg->any.blk_read.bytes > MAXPHYS) { 331 error = DMSG_ERR_PARAM; 332 goto done; 333 } 334 if (iost == NULL) { 335 iost = kmalloc(sizeof(*iost), M_DEVBUF, 336 M_WAITOK | M_ZERO); 337 msg->state->any.any = iost; 338 } 339 reterr = 0; 340 bp = geteblk(msg->any.blk_read.bytes); 341 bio = &bp->b_bio1; 342 bp->b_cmd = BUF_CMD_READ; 343 bp->b_bcount = msg->any.blk_read.bytes; 344 bp->b_resid = bp->b_bcount; 345 bio->bio_offset = msg->any.blk_read.offset; 346 bio->bio_caller_info1.ptr = msg->state; 347 bio->bio_done = diskiodone; 348 /* kdmsg_state_hold(msg->state); */ 349 350 atomic_add_int(&iost->count, 1); 351 if (msg->any.head.cmd & DMSGF_DELETE) 352 iost->eof = 1; 353 BUF_KERNPROC(bp); 354 dev_dstrategy(dp->d_rawdev, bio); 355 } 356 done: 357 if (reterr) { 358 if (msg->any.head.cmd & DMSGF_DELETE) { 359 if (iost && iost->count == 0) { 360 kfree(iost, M_DEVBUF); 361 msg->state->any.any = NULL; 362 } 363 kdmsg_msg_reply(msg, error); 364 } else { 365 kdmsg_msg_result(msg, error); 366 } 367 } 368 } 369 370 static 371 void 372 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg) 373 { 374 struct dios_io *iost; 375 struct buf *bp; 376 struct bio *bio; 377 int error = DMSG_ERR_NOSUPP; 378 int reterr = 1; 379 380 /* 381 * Only DMSG_BLK_WRITE commands imply read ops. 382 */ 383 iost = msg->state->any.any; 384 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) { 385 if (msg->any.blk_write.bytes < DEV_BSIZE || 386 msg->any.blk_write.bytes > MAXPHYS) { 387 error = DMSG_ERR_PARAM; 388 goto done; 389 } 390 if (iost == NULL) { 391 iost = kmalloc(sizeof(*iost), M_DEVBUF, 392 M_WAITOK | M_ZERO); 393 msg->state->any.any = iost; 394 } 395 396 /* 397 * Issue WRITE. Short data implies zeros. Try to optimize 398 * the buffer cache buffer for the case where we can just 399 * use the message's data pointer. 400 */ 401 reterr = 0; 402 if (msg->aux_size >= msg->any.blk_write.bytes) 403 bp = getpbuf(NULL); 404 else 405 bp = geteblk(msg->any.blk_write.bytes); 406 bio = &bp->b_bio1; 407 bp->b_cmd = BUF_CMD_WRITE; 408 bp->b_bcount = msg->any.blk_write.bytes; 409 bp->b_resid = bp->b_bcount; 410 if (msg->aux_size >= msg->any.blk_write.bytes) { 411 bp->b_data = msg->aux_data; 412 } else { 413 bcopy(msg->aux_data, bp->b_data, msg->aux_size); 414 bzero(bp->b_data + msg->aux_size, 415 msg->any.blk_write.bytes - msg->aux_size); 416 } 417 bio->bio_offset = msg->any.blk_write.offset; 418 bio->bio_caller_info1.ptr = msg->state; 419 bio->bio_done = diskiodone; 420 /* kdmsg_state_hold(msg->state); */ 421 422 atomic_add_int(&iost->count, 1); 423 if (msg->any.head.cmd & DMSGF_DELETE) 424 iost->eof = 1; 425 BUF_KERNPROC(bp); 426 dev_dstrategy(dp->d_rawdev, bio); 427 } 428 done: 429 if (reterr) { 430 if (msg->any.head.cmd & DMSGF_DELETE) { 431 if (iost && iost->count == 0) { 432 kfree(iost, M_DEVBUF); 433 msg->state->any.any = NULL; 434 } 435 kdmsg_msg_reply(msg, error); 436 } else { 437 kdmsg_msg_result(msg, error); 438 } 439 } 440 } 441 442 static 443 void 444 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg) 445 { 446 struct dios_io *iost; 447 struct buf *bp; 448 struct bio *bio; 449 int error = DMSG_ERR_NOSUPP; 450 int reterr = 1; 451 452 /* 453 * Only DMSG_BLK_FLUSH commands imply read ops. 454 */ 455 iost = msg->state->any.any; 456 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) { 457 if (iost == NULL) { 458 iost = kmalloc(sizeof(*iost), M_DEVBUF, 459 M_WAITOK | M_ZERO); 460 msg->state->any.any = iost; 461 } 462 reterr = 0; 463 bp = getpbuf(NULL); 464 bio = &bp->b_bio1; 465 bp->b_cmd = BUF_CMD_FLUSH; 466 bp->b_bcount = msg->any.blk_flush.bytes; 467 bp->b_resid = 0; 468 bio->bio_offset = msg->any.blk_flush.offset; 469 bio->bio_caller_info1.ptr = msg->state; 470 bio->bio_done = diskiodone; 471 /* kdmsg_state_hold(msg->state); */ 472 473 atomic_add_int(&iost->count, 1); 474 if (msg->any.head.cmd & DMSGF_DELETE) 475 iost->eof = 1; 476 BUF_KERNPROC(bp); 477 dev_dstrategy(dp->d_rawdev, bio); 478 } 479 if (reterr) { 480 if (msg->any.head.cmd & DMSGF_DELETE) { 481 if (iost && iost->count == 0) { 482 kfree(iost, M_DEVBUF); 483 msg->state->any.any = NULL; 484 } 485 kdmsg_msg_reply(msg, error); 486 } else { 487 kdmsg_msg_result(msg, error); 488 } 489 } 490 } 491 492 static 493 void 494 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg) 495 { 496 struct dios_io *iost; 497 struct buf *bp; 498 struct bio *bio; 499 int error = DMSG_ERR_NOSUPP; 500 int reterr = 1; 501 502 /* 503 * Only DMSG_BLK_FREEBLKS commands imply read ops. 504 */ 505 iost = msg->state->any.any; 506 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) { 507 if (iost == NULL) { 508 iost = kmalloc(sizeof(*iost), M_DEVBUF, 509 M_WAITOK | M_ZERO); 510 msg->state->any.any = iost; 511 } 512 reterr = 0; 513 bp = getpbuf(NULL); 514 bio = &bp->b_bio1; 515 bp->b_cmd = BUF_CMD_FREEBLKS; 516 bp->b_bcount = msg->any.blk_freeblks.bytes; 517 bp->b_resid = 0; 518 bio->bio_offset = msg->any.blk_freeblks.offset; 519 bio->bio_caller_info1.ptr = msg->state; 520 bio->bio_done = diskiodone; 521 /* kdmsg_state_hold(msg->state); */ 522 523 atomic_add_int(&iost->count, 1); 524 if (msg->any.head.cmd & DMSGF_DELETE) 525 iost->eof = 1; 526 BUF_KERNPROC(bp); 527 dev_dstrategy(dp->d_rawdev, bio); 528 } 529 if (reterr) { 530 if (msg->any.head.cmd & DMSGF_DELETE) { 531 if (iost && iost->count == 0) { 532 kfree(iost, M_DEVBUF); 533 msg->state->any.any = NULL; 534 } 535 kdmsg_msg_reply(msg, error); 536 } else { 537 kdmsg_msg_result(msg, error); 538 } 539 } 540 } 541 542 static 543 void 544 diskiodone(struct bio *bio) 545 { 546 struct buf *bp = bio->bio_buf; 547 kdmsg_state_t *state = bio->bio_caller_info1.ptr; 548 kdmsg_msg_t *rmsg; 549 struct dios_io *iost = state->any.any; 550 int error; 551 int resid = 0; 552 int bytes; 553 uint32_t cmd; 554 void *data; 555 556 cmd = DMSG_LNK_ERROR; 557 data = NULL; 558 bytes = 0; 559 560 switch(bp->b_cmd) { 561 case BUF_CMD_READ: 562 cmd = DMSG_LNK_ERROR; 563 data = bp->b_data; 564 bytes = bp->b_bcount; 565 /* fall through */ 566 case BUF_CMD_WRITE: 567 if (bp->b_flags & B_ERROR) { 568 error = bp->b_error; 569 } else { 570 error = 0; 571 resid = bp->b_resid; 572 } 573 break; 574 case BUF_CMD_FLUSH: 575 case BUF_CMD_FREEBLKS: 576 if (bp->b_flags & B_ERROR) 577 error = bp->b_error; 578 else 579 error = 0; 580 break; 581 default: 582 panic("diskiodone: Unknown bio cmd = %d\n", 583 bio->bio_buf->b_cmd); 584 error = 0; /* avoid compiler warning */ 585 break; /* NOT REACHED */ 586 } 587 588 /* 589 * Convert error to DMSG_ERR_* code. 590 */ 591 if (error) 592 error = DMSG_ERR_IO; 593 594 /* 595 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will 596 * have already converted cmd to BLK_ERROR and set up data to return. 597 */ 598 if (resid && cmd == DMSG_LNK_ERROR) 599 cmd = DMSG_BLK_ERROR; 600 /* XXX txcmd is delayed so this won't work for streaming */ 601 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */ 602 cmd |= DMSGF_CREATE; 603 if (iost->eof) { 604 if (atomic_fetchadd_int(&iost->count, -1) == 1) 605 cmd |= DMSGF_DELETE; 606 } else { 607 atomic_add_int(&iost->count, -1); 608 } 609 cmd |= DMSGF_REPLY; 610 611 /* 612 * Allocate a basic or extended reply. Be careful not to populate 613 * extended header fields unless we allocated an extended reply. 614 */ 615 rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0); 616 if (data) { 617 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT); 618 rmsg->aux_size = bytes; 619 rmsg->flags |= KDMSG_FLAG_AUXALLOC; 620 bcopy(data, rmsg->aux_data, bytes); 621 } 622 rmsg->any.blk_error.head.error = error; 623 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR) 624 rmsg->any.blk_error.resid = resid; 625 bio->bio_caller_info1.ptr = NULL; 626 /* kdmsg_state_drop(state); */ 627 kdmsg_msg_write(rmsg); 628 if (bp->b_flags & B_PAGING) { 629 relpbuf(bio->bio_buf, NULL); 630 } else { 631 bp->b_flags |= B_INVAL | B_AGE; 632 brelse(bp); 633 } 634 } 635