1 /* 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * This module allows disk devices to be created and associated with a 36 * communications pipe or socket. You open the device and issue an 37 * ioctl() to install a new disk along with its communications descriptor. 38 * 39 * All further communication occurs via the descriptor using the DMSG 40 * LNK_CONN, LNK_SPAN, and BLOCK protocols. The descriptor can be a 41 * direct connection to a remote machine's disk (in-kernenl), to a remote 42 * cluster controller, to the local cluster controller, etc. 43 * 44 * /dev/xdisk is the control device, issue ioctl()s to create the /dev/xa%d 45 * devices. These devices look like raw disks to the system. 46 */ 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/buf.h> 50 #include <sys/conf.h> 51 #include <sys/device.h> 52 #include <sys/devicestat.h> 53 #include <sys/disk.h> 54 #include <sys/kernel.h> 55 #include <sys/malloc.h> 56 #include <sys/sysctl.h> 57 #include <sys/proc.h> 58 #include <sys/queue.h> 59 #include <sys/udev.h> 60 #include <sys/uuid.h> 61 #include <sys/kern_syscall.h> 62 63 #include <sys/dmsg.h> 64 #include <sys/xdiskioctl.h> 65 66 #include <sys/buf2.h> 67 #include <sys/thread2.h> 68 69 static int xdisk_attach(struct xdisk_attach_ioctl *xaioc); 70 static void xa_exit(kdmsg_iocom_t *iocom); 71 static int xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 72 static int xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 73 static int xa_lnk_rcvmsg(kdmsg_msg_t *msg); 74 static int xa_lnk_dbgmsg(kdmsg_msg_t *msg); 75 static int xa_adhoc_input(kdmsg_msg_t *msg); 76 77 MALLOC_DEFINE(M_XDISK, "Networked disk client", "Network Disks"); 78 79 /* 80 * Control device, issue ioctls to create xa devices. 81 */ 82 static d_open_t xdisk_open; 83 static d_close_t xdisk_close; 84 static d_ioctl_t xdisk_ioctl; 85 86 static struct dev_ops xdisk_ops = { 87 { "xdisk", 0, D_MPSAFE }, 88 .d_open = xdisk_open, 89 .d_close = xdisk_close, 90 .d_ioctl = xdisk_ioctl 91 }; 92 93 /* 94 * XA disk devices 95 */ 96 static d_open_t xa_open; 97 static d_close_t xa_close; 98 static d_ioctl_t xa_ioctl; 99 static d_strategy_t xa_strategy; 100 static d_psize_t xa_size; 101 102 static struct dev_ops xa_ops = { 103 { "xa", 0, D_DISK | D_CANFREE | D_MPSAFE }, 104 .d_open = xa_open, 105 .d_close = xa_close, 106 .d_ioctl = xa_ioctl, 107 .d_read = physread, 108 .d_write = physwrite, 109 .d_strategy = xa_strategy, 110 .d_psize = xa_size 111 }; 112 113 struct xa_softc { 114 TAILQ_ENTRY(xa_softc) entry; 115 cdev_t dev; 116 kdmsg_iocom_t iocom; 117 struct xdisk_attach_ioctl xaioc; 118 struct disk_info info; 119 struct disk disk; 120 uuid_t pfs_fsid; 121 int unit; 122 int inprog; 123 int connected; 124 }; 125 126 static struct lwkt_token xdisk_token = LWKT_TOKEN_INITIALIZER(xdisk_token); 127 static int xdisk_opencount; 128 static cdev_t xdisk_dev; 129 static TAILQ_HEAD(, xa_softc) xa_queue; 130 131 /* 132 * Module initialization 133 */ 134 static int 135 xdisk_modevent(module_t mod, int type, void *data) 136 { 137 switch (type) { 138 case MOD_LOAD: 139 TAILQ_INIT(&xa_queue); 140 xdisk_dev = make_dev(&xdisk_ops, 0, 141 UID_ROOT, GID_WHEEL, 0600, "xdisk"); 142 break; 143 case MOD_UNLOAD: 144 case MOD_SHUTDOWN: 145 if (xdisk_opencount || TAILQ_FIRST(&xa_queue)) 146 return (EBUSY); 147 if (xdisk_dev) { 148 destroy_dev(xdisk_dev); 149 xdisk_dev = NULL; 150 } 151 dev_ops_remove_all(&xdisk_ops); 152 dev_ops_remove_all(&xa_ops); 153 break; 154 default: 155 break; 156 } 157 return 0; 158 } 159 160 DEV_MODULE(xdisk, xdisk_modevent, 0); 161 162 /* 163 * Control device 164 */ 165 static int 166 xdisk_open(struct dev_open_args *ap) 167 { 168 lwkt_gettoken(&xdisk_token); 169 ++xdisk_opencount; 170 lwkt_reltoken(&xdisk_token); 171 return(0); 172 } 173 174 static int 175 xdisk_close(struct dev_close_args *ap) 176 { 177 lwkt_gettoken(&xdisk_token); 178 --xdisk_opencount; 179 lwkt_reltoken(&xdisk_token); 180 return(0); 181 } 182 183 static int 184 xdisk_ioctl(struct dev_ioctl_args *ap) 185 { 186 int error; 187 188 switch(ap->a_cmd) { 189 case XDISKIOCATTACH: 190 error = xdisk_attach((void *)ap->a_data); 191 break; 192 default: 193 error = ENOTTY; 194 break; 195 } 196 return error; 197 } 198 199 /************************************************************************ 200 * DMSG INTERFACE * 201 ************************************************************************/ 202 203 static int 204 xdisk_attach(struct xdisk_attach_ioctl *xaioc) 205 { 206 struct xa_softc *scan; 207 struct xa_softc *xa; 208 struct file *fp; 209 kdmsg_msg_t *msg; 210 int unit; 211 char devname[64]; 212 cdev_t dev; 213 214 fp = holdfp(curproc->p_fd, xaioc->fd, -1); 215 if (fp == NULL) 216 return EINVAL; 217 218 xa = kmalloc(sizeof(*xa), M_XDISK, M_WAITOK|M_ZERO); 219 220 /* 221 * Find unit 222 */ 223 lwkt_gettoken(&xdisk_token); 224 unit = 0; 225 do { 226 TAILQ_FOREACH(scan, &xa_queue, entry) { 227 if (scan->unit == unit) 228 break; 229 } 230 } while (scan != NULL); 231 xa->unit = unit; 232 xa->xaioc = *xaioc; 233 TAILQ_INSERT_TAIL(&xa_queue, xa, entry); 234 lwkt_reltoken(&xdisk_token); 235 236 /* 237 * Create device 238 */ 239 dev = disk_create(unit, &xa->disk, &xa_ops); 240 dev->si_drv1 = xa; 241 xa->dev = dev; 242 243 xa->info.d_media_blksize = 512; 244 xa->info.d_media_blocks = xaioc->size / 512; 245 xa->info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE; 246 xa->info.d_secpertrack = 32; 247 xa->info.d_nheads = 64; 248 xa->info.d_secpercyl = xa->info.d_secpertrack * xa->info.d_nheads; 249 xa->info.d_ncylinders = 0; 250 disk_setdiskinfo_sync(&xa->disk, &xa->info); 251 252 /* 253 * Set up messaging connection 254 */ 255 ksnprintf(devname, sizeof(devname), "xa%d", unit); 256 kdmsg_iocom_init(&xa->iocom, xa, M_XDISK, 257 xa_lnk_rcvmsg, 258 xa_lnk_dbgmsg, 259 xa_adhoc_input); 260 xa->iocom.exit_func = xa_exit; 261 xa->inprog = 1; 262 kern_uuidgen(&xa->pfs_fsid, 1); 263 kdmsg_iocom_reconnect(&xa->iocom, fp, devname); 264 265 /* 266 * Issue DMSG_LNK_CONN for device. This sets up filters so hopefully 267 * the only SPANs we receive are from servers providing the label 268 * being configured. Hopefully that's just a single server(!)(!). 269 * (HAMMER peers might have multiple servers but block device peers 270 * currently only allow one). There could still be multiple spans 271 * due to there being multiple paths available, however. 272 */ 273 274 msg = kdmsg_msg_alloc(&xa->iocom.router, DMSG_LNK_CONN | DMSGF_CREATE, 275 xa_msg_conn_reply, xa); 276 msg->any.lnk_conn.pfs_type = 0; 277 msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 278 msg->any.lnk_conn.peer_type = DMSG_PEER_BLOCK; 279 msg->any.lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK; 280 ksnprintf(msg->any.lnk_conn.cl_label, 281 sizeof(msg->any.lnk_conn.cl_label), 282 "%s", xaioc->cl_label); 283 msg->any.lnk_conn.pfs_fsid = xa->pfs_fsid; 284 xa->iocom.conn_state = msg->state; 285 kdmsg_msg_write(msg); 286 287 xa->inprog = 0; /* unstall msg thread exit (if racing) */ 288 289 return(0); 290 } 291 292 /* 293 * Handle reply to our LNK_CONN transaction (transaction remains open) 294 */ 295 static 296 int 297 xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 298 { 299 struct xa_softc *xa = state->any.any; 300 kdmsg_msg_t *rmsg; 301 302 if (msg->any.head.cmd & DMSGF_CREATE) { 303 kprintf("XA LNK_CONN received reply\n"); 304 rmsg = kdmsg_msg_alloc(&xa->iocom.router, 305 DMSG_LNK_SPAN | DMSGF_CREATE, 306 xa_msg_span_reply, xa); 307 rmsg->any.lnk_span.pfs_type = 0; 308 rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1; 309 rmsg->any.lnk_span.peer_type = DMSG_PEER_BLOCK; 310 311 ksnprintf(rmsg->any.lnk_span.cl_label, 312 sizeof(rmsg->any.lnk_span.cl_label), 313 "%s", xa->xaioc.cl_label); 314 kdmsg_msg_write(rmsg); 315 } 316 if ((state->txcmd & DMSGF_DELETE) == 0 && 317 (msg->any.head.cmd & DMSGF_DELETE)) { 318 kprintf("DISK LNK_CONN terminated by remote\n"); 319 xa->iocom.conn_state = NULL; 320 kdmsg_msg_reply(msg, 0); 321 } 322 return(0); 323 } 324 325 static int 326 xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 327 { 328 if ((state->txcmd & DMSGF_DELETE) == 0 && 329 (msg->any.head.cmd & DMSGF_DELETE)) { 330 kprintf("SPAN REPLY - Our sent span was terminated by the " 331 "remote %08x state %p\n", msg->any.head.cmd, state); 332 kdmsg_msg_reply(msg, 0); 333 } 334 return (0); 335 } 336 337 /* 338 * Called from iocom core transmit thread upon disconnect. 339 */ 340 static 341 void 342 xa_exit(kdmsg_iocom_t *iocom) 343 { 344 struct xa_softc *xa = iocom->handle; 345 346 kprintf("XA_EXIT UNIT %d\n", xa->unit); 347 348 kdmsg_iocom_uninit(iocom); 349 350 while (xa->inprog) { 351 tsleep(xa, 0, "xarace", hz); 352 } 353 354 /* 355 * XXX allow reconnection, wait for users to terminate? 356 */ 357 358 disk_destroy(&xa->disk); 359 360 lwkt_gettoken(&xdisk_token); 361 TAILQ_REMOVE(&xa_queue, xa, entry); 362 lwkt_reltoken(&xdisk_token); 363 364 kfree(xa, M_XDISK); 365 } 366 367 static int 368 xa_lnk_rcvmsg(kdmsg_msg_t *msg) 369 { 370 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 371 case DMSG_LNK_CONN | DMSGF_CREATE: 372 /* 373 * connection request from peer, send a streaming 374 * result of 0 (leave the transaction open). Transaction 375 * is left open for the duration of the connection, we 376 * let the kern_dmsg module clean it up on disconnect. 377 */ 378 kdmsg_msg_result(msg, 0); 379 break; 380 case DMSG_LNK_SPAN | DMSGF_CREATE: 381 /* 382 * Incoming SPAN - transaction create 383 * 384 * We do not have to respond right now. Instead we will 385 * respond later on when the peer deletes their side. 386 */ 387 break; 388 case DMSG_LNK_SPAN | DMSGF_DELETE: 389 /* 390 * Incoming SPAN - transaction delete. 391 * 392 * We must terminate our side so both ends can free up 393 * their recorded state. 394 */ 395 /* fall through */ 396 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 397 /* 398 * Incoming SPAN - transaction delete (degenerate span). 399 * 400 * We must terminate our side so both ends can free up 401 * their recorded state. 402 */ 403 kdmsg_msg_reply(msg, 0); 404 break; 405 default: 406 /* 407 * Unsupported LNK message received. We only need to 408 * reply if it's a transaction in order to close our end. 409 * Ignore any one-way messages are any further messages 410 * associated with the transaction. 411 * 412 * NOTE: This case also includes DMSG_LNK_ERROR messages 413 * which might be one-way, replying to those would 414 * cause an infinite ping-pong. 415 */ 416 if (msg->any.head.cmd & DMSGF_CREATE) 417 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 418 break; 419 } 420 return(0); 421 } 422 423 static int 424 xa_lnk_dbgmsg(kdmsg_msg_t *msg) 425 { 426 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) { 427 case DMSG_DBG_SHELL: 428 /* 429 * Execute shell command (not supported atm). 430 * 431 * This is a one-way packet but if not (e.g. if part of 432 * a streaming transaction), we will have already closed 433 * our end. 434 */ 435 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 436 break; 437 case DMSG_DBG_SHELL | DMSGF_REPLY: 438 /* 439 * Receive one or more replies to a shell command that we 440 * sent. 441 * 442 * This is a one-way packet but if not (e.g. if part of 443 * a streaming transaction), we will have already closed 444 * our end. 445 */ 446 if (msg->aux_data) { 447 msg->aux_data[msg->aux_size - 1] = 0; 448 kprintf("DEBUGMSG: %s\n", msg->aux_data); 449 } 450 break; 451 default: 452 /* 453 * We don't understand what is going on, issue a reply. 454 * This will take care of all left-over cases whether it 455 * is a transaction or one-way. 456 */ 457 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 458 break; 459 } 460 return(0); 461 } 462 463 static int 464 xa_adhoc_input(kdmsg_msg_t *msg) 465 { 466 kprintf("XA ADHOC INPUT MSG %08x\n", msg->any.head.cmd); 467 return(0); 468 } 469 470 /************************************************************************ 471 * XA DEVICE INTERFACE * 472 ************************************************************************/ 473 474 static int 475 xa_open(struct dev_open_args *ap) 476 { 477 cdev_t dev = ap->a_head.a_dev; 478 struct xa_softc *xa; 479 480 xa = dev->si_drv1; 481 482 dev->si_bsize_phys = 512; 483 dev->si_bsize_best = 32768; 484 485 /* 486 * Issue streaming open and wait for reply. 487 */ 488 489 /* XXX check ap->a_oflags & FWRITE, EACCES if read-only */ 490 491 return(0); 492 } 493 494 static int 495 xa_close(struct dev_close_args *ap) 496 { 497 cdev_t dev = ap->a_head.a_dev; 498 } 499 500 static int 501 xa_strategy(struct dev_strategy_args *ap) 502 { 503 } 504 505 static int 506 xa_ioctl(struct dev_ioctl_args *ap) 507 { 508 return (ENOTTY); 509 } 510 511 static int 512 xa_size(struct dev_psize_args *ap) 513 { 514 struct xa_softc *xa; 515 516 if ((xa = ap->a_head.a_dev->si_drv1) == NULL) 517 return (ENXIO); 518 if (xa->inprog) 519 return (ENXIO); 520 ap->a_result = xa->info.d_media_blocks; 521 return (0); 522 } 523