1 /*- 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate 36 * a streaming response. See subr_diskiocom()'s diskiodone(). 37 */ 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/conf.h> 42 #include <sys/systm.h> 43 #include <sys/queue.h> 44 #include <sys/tree.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/socket.h> 48 #include <sys/vnode.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/priv.h> 52 #include <sys/thread.h> 53 #include <sys/globaldata.h> 54 #include <sys/limits.h> 55 56 #include <sys/dmsg.h> 57 58 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp); 59 60 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg); 61 static int kdmsg_state_msgrx(kdmsg_msg_t *msg); 62 static int kdmsg_state_msgtx(kdmsg_msg_t *msg); 63 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg); 64 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg); 65 static void kdmsg_state_abort(kdmsg_state_t *state); 66 static void kdmsg_state_free(kdmsg_state_t *state); 67 68 static void kdmsg_iocom_thread_rd(void *arg); 69 static void kdmsg_iocom_thread_wr(void *arg); 70 static int kdmsg_autorxmsg(kdmsg_msg_t *msg); 71 72 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/ 73 74 /* 75 * Initialize the roll-up communications structure for a network 76 * messaging session. This function does not install the socket. 77 */ 78 void 79 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags, 80 struct malloc_type *mmsg, 81 int (*rcvmsg)(kdmsg_msg_t *msg)) 82 { 83 bzero(iocom, sizeof(*iocom)); 84 iocom->handle = handle; 85 iocom->mmsg = mmsg; 86 iocom->rcvmsg = rcvmsg; 87 iocom->flags = flags; 88 lockinit(&iocom->msglk, "h2msg", 0, 0); 89 TAILQ_INIT(&iocom->msgq); 90 RB_INIT(&iocom->staterd_tree); 91 RB_INIT(&iocom->statewr_tree); 92 93 iocom->state0.iocom = iocom; 94 iocom->state0.parent = &iocom->state0; 95 TAILQ_INIT(&iocom->state0.subq); 96 } 97 98 /* 99 * [Re]connect using the passed file pointer. The caller must ref the 100 * fp for us. We own that ref now. 101 */ 102 void 103 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, 104 const char *subsysname) 105 { 106 /* 107 * Destroy the current connection 108 */ 109 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 110 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 111 while (iocom->msgrd_td || iocom->msgwr_td) { 112 wakeup(&iocom->msg_ctl); 113 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 114 } 115 116 /* 117 * Drop communications descriptor 118 */ 119 if (iocom->msg_fp) { 120 fdrop(iocom->msg_fp); 121 iocom->msg_fp = NULL; 122 } 123 124 /* 125 * Setup new communications descriptor 126 */ 127 iocom->msg_ctl = 0; 128 iocom->msg_fp = fp; 129 iocom->msg_seq = 0; 130 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC; 131 132 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td, 133 NULL, 0, -1, "%s-msgrd", subsysname); 134 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td, 135 NULL, 0, -1, "%s-msgwr", subsysname); 136 lockmgr(&iocom->msglk, LK_RELEASE); 137 } 138 139 /* 140 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls 141 * this function to handle the state machine for LNK_CONN and LNK_SPAN. 142 */ 143 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 144 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 145 146 void 147 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom, 148 void (*auto_callback)(kdmsg_msg_t *msg)) 149 { 150 kdmsg_msg_t *msg; 151 152 iocom->auto_callback = auto_callback; 153 154 msg = kdmsg_msg_alloc(&iocom->state0, 155 DMSG_LNK_CONN | DMSGF_CREATE, 156 kdmsg_lnk_conn_reply, NULL); 157 iocom->auto_lnk_conn.head = msg->any.head; 158 msg->any.lnk_conn = iocom->auto_lnk_conn; 159 iocom->conn_state = msg->state; 160 kdmsg_msg_write(msg); 161 } 162 163 static 164 int 165 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 166 { 167 kdmsg_iocom_t *iocom = state->iocom; 168 kdmsg_msg_t *rmsg; 169 170 /* 171 * Upon receipt of the LNK_CONN acknowledgement initiate an 172 * automatic SPAN if we were asked to. Used by e.g. xdisk, but 173 * not used by HAMMER2 which must manage more than one transmitted 174 * SPAN. 175 */ 176 if ((msg->any.head.cmd & DMSGF_CREATE) && 177 (iocom->flags & KDMSG_IOCOMF_AUTOTXSPAN)) { 178 rmsg = kdmsg_msg_alloc(&iocom->state0, 179 DMSG_LNK_SPAN | DMSGF_CREATE, 180 kdmsg_lnk_span_reply, NULL); 181 iocom->auto_lnk_span.head = rmsg->any.head; 182 rmsg->any.lnk_span = iocom->auto_lnk_span; 183 kdmsg_msg_write(rmsg); 184 } 185 186 /* 187 * Process shim after the CONN is acknowledged and before the CONN 188 * transaction is deleted. For deletions this gives device drivers 189 * the ability to interlock new operations on the circuit before 190 * it becomes illegal and panics. 191 */ 192 if (iocom->auto_callback) 193 iocom->auto_callback(msg); 194 195 if ((state->txcmd & DMSGF_DELETE) == 0 && 196 (msg->any.head.cmd & DMSGF_DELETE)) { 197 iocom->conn_state = NULL; 198 kdmsg_msg_reply(msg, 0); 199 } 200 201 return (0); 202 } 203 204 static 205 int 206 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 207 { 208 /* 209 * Be sure to process shim before terminating the SPAN 210 * transaction. Gives device drivers the ability to 211 * interlock new operations on the circuit before it 212 * becomes illegal and panics. 213 */ 214 if (state->iocom->auto_callback) 215 state->iocom->auto_callback(msg); 216 217 if ((state->txcmd & DMSGF_DELETE) == 0 && 218 (msg->any.head.cmd & DMSGF_DELETE)) { 219 kdmsg_msg_reply(msg, 0); 220 } 221 return (0); 222 } 223 224 /* 225 * Disconnect and clean up 226 */ 227 void 228 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) 229 { 230 kdmsg_state_t *state; 231 232 /* 233 * Ask the cluster controller to go away 234 */ 235 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 236 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 237 238 while (iocom->msgrd_td || iocom->msgwr_td) { 239 wakeup(&iocom->msg_ctl); 240 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 241 } 242 243 /* 244 * Cleanup caches 245 */ 246 if ((state = iocom->freerd_state) != NULL) { 247 iocom->freerd_state = NULL; 248 kdmsg_state_free(state); 249 } 250 251 if ((state = iocom->freewr_state) != NULL) { 252 iocom->freewr_state = NULL; 253 kdmsg_state_free(state); 254 } 255 256 /* 257 * Drop communications descriptor 258 */ 259 if (iocom->msg_fp) { 260 fdrop(iocom->msg_fp); 261 iocom->msg_fp = NULL; 262 } 263 lockmgr(&iocom->msglk, LK_RELEASE); 264 } 265 266 /* 267 * Cluster controller thread. Perform messaging functions. We have one 268 * thread for the reader and one for the writer. The writer handles 269 * shutdown requests (which should break the reader thread). 270 */ 271 static 272 void 273 kdmsg_iocom_thread_rd(void *arg) 274 { 275 kdmsg_iocom_t *iocom = arg; 276 dmsg_hdr_t hdr; 277 kdmsg_msg_t *msg = NULL; 278 size_t hbytes; 279 size_t abytes; 280 int error = 0; 281 282 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) { 283 /* 284 * Retrieve the message from the pipe or socket. 285 */ 286 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr), 287 NULL, 1, UIO_SYSSPACE); 288 if (error) 289 break; 290 if (hdr.magic != DMSG_HDR_MAGIC) { 291 kprintf("kdmsg: bad magic: %04x\n", hdr.magic); 292 error = EINVAL; 293 break; 294 } 295 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN; 296 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) { 297 kprintf("kdmsg: bad header size %zd\n", hbytes); 298 error = EINVAL; 299 break; 300 } 301 302 /* XXX messy: mask cmd to avoid allocating state */ 303 msg = kdmsg_msg_alloc(&iocom->state0, 304 hdr.cmd & DMSGF_BASECMDMASK, 305 NULL, NULL); 306 msg->any.head = hdr; 307 msg->hdr_size = hbytes; 308 if (hbytes > sizeof(hdr)) { 309 error = fp_read(iocom->msg_fp, &msg->any.head + 1, 310 hbytes - sizeof(hdr), 311 NULL, 1, UIO_SYSSPACE); 312 if (error) { 313 kprintf("kdmsg: short msg received\n"); 314 error = EINVAL; 315 break; 316 } 317 } 318 msg->aux_size = hdr.aux_bytes; 319 if (msg->aux_size > DMSG_AUX_MAX) { 320 kprintf("kdmsg: illegal msg payload size %zd\n", 321 msg->aux_size); 322 error = EINVAL; 323 break; 324 } 325 if (msg->aux_size) { 326 abytes = DMSG_DOALIGN(msg->aux_size); 327 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK); 328 msg->flags |= KDMSG_FLAG_AUXALLOC; 329 error = fp_read(iocom->msg_fp, msg->aux_data, 330 abytes, NULL, 1, UIO_SYSSPACE); 331 if (error) { 332 kprintf("kdmsg: short msg payload received\n"); 333 break; 334 } 335 } 336 337 error = kdmsg_msg_receive_handling(msg); 338 msg = NULL; 339 } 340 341 if (error) 342 kprintf("kdmsg: read failed error %d\n", error); 343 344 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 345 if (msg) 346 kdmsg_msg_free(msg); 347 348 /* 349 * Shutdown the socket before waiting for the transmit side. 350 * 351 * If we are dying due to e.g. a socket disconnect verses being 352 * killed explicity we have to set KILL in order to kick the tx 353 * side when it might not have any other work to do. KILL might 354 * already be set if we are in an unmount or reconnect. 355 */ 356 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 357 358 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 359 wakeup(&iocom->msg_ctl); 360 361 /* 362 * Wait for the transmit side to drain remaining messages 363 * before cleaning up the rx state. The transmit side will 364 * set KILLTX and wait for the rx side to completely finish 365 * (set msgrd_td to NULL) before cleaning up any remaining 366 * tx states. 367 */ 368 lockmgr(&iocom->msglk, LK_RELEASE); 369 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 370 wakeup(&iocom->msg_ctl); 371 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) { 372 wakeup(&iocom->msg_ctl); 373 tsleep(iocom, 0, "clstrkw", hz); 374 } 375 376 iocom->msgrd_td = NULL; 377 378 /* 379 * iocom can be ripped out from under us at this point but 380 * wakeup() is safe. 381 */ 382 wakeup(iocom); 383 lwkt_exit(); 384 } 385 386 static 387 void 388 kdmsg_iocom_thread_wr(void *arg) 389 { 390 kdmsg_iocom_t *iocom = arg; 391 kdmsg_msg_t *msg; 392 kdmsg_state_t *state; 393 ssize_t res; 394 size_t abytes; 395 int error = 0; 396 int retries = 20; 397 398 /* 399 * Transmit loop 400 */ 401 msg = NULL; 402 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 403 404 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) { 405 /* 406 * Sleep if no messages pending. Interlock with flag while 407 * holding msglk. 408 */ 409 if (TAILQ_EMPTY(&iocom->msgq)) { 410 atomic_set_int(&iocom->msg_ctl, 411 KDMSG_CLUSTERCTL_SLEEPING); 412 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz); 413 atomic_clear_int(&iocom->msg_ctl, 414 KDMSG_CLUSTERCTL_SLEEPING); 415 } 416 417 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 418 /* 419 * Remove msg from the transmit queue and do 420 * persist and half-closed state handling. 421 */ 422 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 423 lockmgr(&iocom->msglk, LK_RELEASE); 424 425 error = kdmsg_state_msgtx(msg); 426 if (error == EALREADY) { 427 error = 0; 428 kdmsg_msg_free(msg); 429 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 430 continue; 431 } 432 if (error) { 433 kdmsg_msg_free(msg); 434 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 435 break; 436 } 437 438 /* 439 * Dump the message to the pipe or socket. 440 * 441 * We have to clean up the message as if the transmit 442 * succeeded even if it failed. 443 */ 444 error = fp_write(iocom->msg_fp, &msg->any, 445 msg->hdr_size, &res, UIO_SYSSPACE); 446 if (error || res != msg->hdr_size) { 447 if (error == 0) 448 error = EINVAL; 449 kdmsg_state_cleanuptx(msg); 450 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 451 break; 452 } 453 if (msg->aux_size) { 454 abytes = DMSG_DOALIGN(msg->aux_size); 455 error = fp_write(iocom->msg_fp, 456 msg->aux_data, abytes, 457 &res, UIO_SYSSPACE); 458 if (error || res != abytes) { 459 if (error == 0) 460 error = EINVAL; 461 kdmsg_state_cleanuptx(msg); 462 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 463 break; 464 } 465 } 466 kdmsg_state_cleanuptx(msg); 467 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 468 } 469 } 470 471 /* 472 * Cleanup messages pending transmission and release msgq lock. 473 */ 474 if (error) 475 kprintf("kdmsg: write failed error %d\n", error); 476 kprintf("thread_wr: Terminating iocom\n"); 477 478 /* 479 * Shutdown the socket. This will cause the rx thread to get an 480 * EOF and ensure that both threads get to a termination state. 481 */ 482 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 483 484 /* 485 * Set KILLTX (which the rx side waits for), then wait for the RX 486 * side to completely finish before we clean out any remaining 487 * command states. 488 */ 489 lockmgr(&iocom->msglk, LK_RELEASE); 490 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX); 491 wakeup(&iocom->msg_ctl); 492 while (iocom->msgrd_td) { 493 wakeup(&iocom->msg_ctl); 494 tsleep(iocom, 0, "clstrkw", hz); 495 } 496 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 497 498 /* 499 * Simulate received MSGF_DELETE's for any remaining states. 500 * (For remote masters). 501 * 502 * Drain the message queue to handle any device initiated writes 503 * due to state callbacks. 504 */ 505 cleanuprd: 506 kdmsg_drain_msgq(iocom); 507 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) { 508 if ((state->rxcmd & DMSGF_DELETE) == 0) { 509 lockmgr(&iocom->msglk, LK_RELEASE); 510 kdmsg_state_abort(state); 511 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 512 goto cleanuprd; 513 } 514 } 515 516 /* 517 * Simulate received MSGF_DELETE's for any remaining states. 518 * (For local masters). 519 */ 520 cleanupwr: 521 kdmsg_drain_msgq(iocom); 522 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) { 523 if ((state->rxcmd & DMSGF_DELETE) == 0) { 524 lockmgr(&iocom->msglk, LK_RELEASE); 525 kdmsg_state_abort(state); 526 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 527 goto cleanupwr; 528 } 529 } 530 531 /* 532 * Retry until all work is done 533 */ 534 if (--retries == 0) 535 panic("kdmsg: comm thread shutdown couldn't drain"); 536 if (TAILQ_FIRST(&iocom->msgq) || 537 RB_ROOT(&iocom->staterd_tree) || 538 RB_ROOT(&iocom->statewr_tree)) { 539 goto cleanuprd; 540 } 541 iocom->flags |= KDMSG_IOCOMF_EXITNOACC; 542 543 lockmgr(&iocom->msglk, LK_RELEASE); 544 545 /* 546 * The state trees had better be empty now 547 */ 548 KKASSERT(RB_EMPTY(&iocom->staterd_tree)); 549 KKASSERT(RB_EMPTY(&iocom->statewr_tree)); 550 KKASSERT(iocom->conn_state == NULL); 551 552 if (iocom->exit_func) { 553 /* 554 * iocom is invalid after we call the exit function. 555 */ 556 iocom->msgwr_td = NULL; 557 iocom->exit_func(iocom); 558 } else { 559 /* 560 * iocom can be ripped out from under us once msgwr_td is 561 * set to NULL. The wakeup is safe. 562 */ 563 iocom->msgwr_td = NULL; 564 wakeup(iocom); 565 } 566 lwkt_exit(); 567 } 568 569 /* 570 * This cleans out the pending transmit message queue, adjusting any 571 * persistent states properly in the process. 572 * 573 * Caller must hold pmp->iocom.msglk 574 */ 575 void 576 kdmsg_drain_msgq(kdmsg_iocom_t *iocom) 577 { 578 kdmsg_msg_t *msg; 579 580 /* 581 * Clean out our pending transmit queue, executing the 582 * appropriate state adjustments. If this tries to open 583 * any new outgoing transactions we have to loop up and 584 * clean them out. 585 */ 586 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 587 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 588 lockmgr(&iocom->msglk, LK_RELEASE); 589 if (kdmsg_state_msgtx(msg)) 590 kdmsg_msg_free(msg); 591 else 592 kdmsg_state_cleanuptx(msg); 593 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 594 } 595 } 596 597 /* 598 * Do all processing required to handle a freshly received message 599 * after its low level header has been validated. 600 */ 601 static 602 int 603 kdmsg_msg_receive_handling(kdmsg_msg_t *msg) 604 { 605 kdmsg_iocom_t *iocom = msg->state->iocom; 606 int error; 607 608 /* 609 * State machine tracking, state assignment for msg, 610 * returns error and discard status. Errors are fatal 611 * to the connection except for EALREADY which forces 612 * a discard without execution. 613 */ 614 error = kdmsg_state_msgrx(msg); 615 if (error) { 616 /* 617 * Raw protocol or connection error 618 */ 619 kdmsg_msg_free(msg); 620 if (error == EALREADY) 621 error = 0; 622 } else if (msg->state && msg->state->func) { 623 /* 624 * Message related to state which already has a 625 * handling function installed for it. 626 */ 627 error = msg->state->func(msg->state, msg); 628 kdmsg_state_cleanuprx(msg); 629 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) { 630 error = kdmsg_autorxmsg(msg); 631 kdmsg_state_cleanuprx(msg); 632 } else { 633 error = iocom->rcvmsg(msg); 634 kdmsg_state_cleanuprx(msg); 635 } 636 return error; 637 } 638 639 /* 640 * Process state tracking for a message after reception, prior to 641 * execution. 642 * 643 * Called with msglk held and the msg dequeued. 644 * 645 * All messages are called with dummy state and return actual state. 646 * (One-off messages often just return the same dummy state). 647 * 648 * May request that caller discard the message by setting *discardp to 1. 649 * The returned state is not used in this case and is allowed to be NULL. 650 * 651 * -- 652 * 653 * These routines handle persistent and command/reply message state via the 654 * CREATE and DELETE flags. The first message in a command or reply sequence 655 * sets CREATE, the last message in a command or reply sequence sets DELETE. 656 * 657 * There can be any number of intermediate messages belonging to the same 658 * sequence sent inbetween the CREATE message and the DELETE message, 659 * which set neither flag. This represents a streaming command or reply. 660 * 661 * Any command message received with CREATE set expects a reply sequence to 662 * be returned. Reply sequences work the same as command sequences except the 663 * REPLY bit is also sent. Both the command side and reply side can 664 * degenerate into a single message with both CREATE and DELETE set. Note 665 * that one side can be streaming and the other side not, or neither, or both. 666 * 667 * The msgid is unique for the initiator. That is, two sides sending a new 668 * message can use the same msgid without colliding. 669 * 670 * -- 671 * 672 * ABORT sequences work by setting the ABORT flag along with normal message 673 * state. However, ABORTs can also be sent on half-closed messages, that is 674 * even if the command or reply side has already sent a DELETE, as long as 675 * the message has not been fully closed it can still send an ABORT+DELETE 676 * to terminate the half-closed message state. 677 * 678 * Since ABORT+DELETEs can race we silently discard ABORT's for message 679 * state which has already been fully closed. REPLY+ABORT+DELETEs can 680 * also race, and in this situation the other side might have already 681 * initiated a new unrelated command with the same message id. Since 682 * the abort has not set the CREATE flag the situation can be detected 683 * and the message will also be discarded. 684 * 685 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 686 * The ABORT request is essentially integrated into the command instead 687 * of being sent later on. In this situation the command implementation 688 * detects that CREATE and ABORT are both set (vs ABORT alone) and can 689 * special-case non-blocking operation for the command. 690 * 691 * NOTE! Messages with ABORT set without CREATE or DELETE are considered 692 * to be mid-stream aborts for command/reply sequences. ABORTs on 693 * one-way messages are not supported. 694 * 695 * NOTE! If a command sequence does not support aborts the ABORT flag is 696 * simply ignored. 697 * 698 * -- 699 * 700 * One-off messages (no reply expected) are sent with neither CREATE or DELETE 701 * set. One-off messages cannot be aborted and typically aren't processed 702 * by these routines. The REPLY bit can be used to distinguish whether a 703 * one-off message is a command or reply. For example, one-off replies 704 * will typically just contain status updates. 705 */ 706 static 707 int 708 kdmsg_state_msgrx(kdmsg_msg_t *msg) 709 { 710 kdmsg_iocom_t *iocom = msg->state->iocom; 711 kdmsg_state_t *state; 712 kdmsg_state_t *pstate; 713 kdmsg_state_t sdummy; 714 int error; 715 716 /* 717 * Make sure a state structure is ready to go in case we need a new 718 * one. This is the only routine which uses freerd_state so no 719 * races are possible. 720 */ 721 if ((state = iocom->freerd_state) == NULL) { 722 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 723 state->flags = KDMSG_STATE_DYNAMIC; 724 state->iocom = iocom; 725 TAILQ_INIT(&state->subq); 726 iocom->freerd_state = state; 727 } 728 729 /* 730 * Lock RB tree and locate existing persistent state, if any. 731 * 732 * If received msg is a command state is on staterd_tree. 733 * If received msg is a reply state is on statewr_tree. 734 */ 735 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 736 737 sdummy.msgid = msg->any.head.msgid; 738 sdummy.iocom = iocom; 739 if (msg->any.head.cmd & DMSGF_REVTRANS) { 740 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, 741 &sdummy); 742 } else { 743 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, 744 &sdummy); 745 } 746 if (state == NULL) 747 state = &iocom->state0; 748 msg->state = state; 749 750 /* 751 * Short-cut one-off or mid-stream messages. 752 */ 753 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 754 DMSGF_ABORT)) == 0) { 755 error = 0; 756 goto done; 757 } 758 759 /* 760 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 761 * inside the case statements. 762 */ 763 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) { 764 case DMSGF_CREATE: 765 case DMSGF_CREATE | DMSGF_DELETE: 766 /* 767 * New persistant command received. 768 */ 769 if (state != &iocom->state0) { 770 kprintf("kdmsg_state_msgrx: duplicate transaction\n"); 771 error = EINVAL; 772 break; 773 } 774 775 /* 776 * Lookup the circuit. The circuit is an open transaction. 777 * the REVCIRC bit in the message tells us which side 778 * initiated the transaction representing the circuit. 779 */ 780 if (msg->any.head.circuit) { 781 sdummy.msgid = msg->any.head.circuit; 782 783 if (msg->any.head.cmd & DMSGF_REVCIRC) { 784 pstate = RB_FIND(kdmsg_state_tree, 785 &iocom->statewr_tree, 786 &sdummy); 787 } else { 788 pstate = RB_FIND(kdmsg_state_tree, 789 &iocom->staterd_tree, 790 &sdummy); 791 } 792 if (pstate == NULL) { 793 kprintf("kdmsg_state_msgrx: " 794 "missing parent in stacked trans\n"); 795 error = EINVAL; 796 break; 797 } 798 } else { 799 pstate = &iocom->state0; 800 } 801 802 /* 803 * Allocate new state 804 */ 805 state = iocom->freerd_state; 806 iocom->freerd_state = NULL; 807 808 msg->state = state; 809 state->parent = pstate; 810 KKASSERT(state->iocom == iocom); 811 state->flags |= KDMSG_STATE_INSERTED | 812 KDMSG_STATE_OPPOSITE; 813 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 814 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 815 state->txcmd = DMSGF_REPLY; 816 state->msgid = msg->any.head.msgid; 817 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state); 818 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 819 error = 0; 820 break; 821 case DMSGF_DELETE: 822 /* 823 * Persistent state is expected but might not exist if an 824 * ABORT+DELETE races the close. 825 */ 826 if (state == &iocom->state0) { 827 if (msg->any.head.cmd & DMSGF_ABORT) { 828 error = EALREADY; 829 } else { 830 kprintf("kdmsg_state_msgrx: " 831 "no state for DELETE\n"); 832 error = EINVAL; 833 } 834 break; 835 } 836 837 /* 838 * Handle another ABORT+DELETE case if the msgid has already 839 * been reused. 840 */ 841 if ((state->rxcmd & DMSGF_CREATE) == 0) { 842 if (msg->any.head.cmd & DMSGF_ABORT) { 843 error = EALREADY; 844 } else { 845 kprintf("kdmsg_state_msgrx: " 846 "state reused for DELETE\n"); 847 error = EINVAL; 848 } 849 break; 850 } 851 error = 0; 852 break; 853 default: 854 /* 855 * Check for mid-stream ABORT command received, otherwise 856 * allow. 857 */ 858 if (msg->any.head.cmd & DMSGF_ABORT) { 859 if (state == &iocom->state0 || 860 (state->rxcmd & DMSGF_CREATE) == 0) { 861 error = EALREADY; 862 break; 863 } 864 } 865 error = 0; 866 break; 867 case DMSGF_REPLY | DMSGF_CREATE: 868 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 869 /* 870 * When receiving a reply with CREATE set the original 871 * persistent state message should already exist. 872 */ 873 if (state == &iocom->state0) { 874 kprintf("kdmsg_state_msgrx: no state match for " 875 "REPLY cmd=%08x msgid=%016jx\n", 876 msg->any.head.cmd, 877 (intmax_t)msg->any.head.msgid); 878 error = EINVAL; 879 break; 880 } 881 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 882 error = 0; 883 break; 884 case DMSGF_REPLY | DMSGF_DELETE: 885 /* 886 * Received REPLY+ABORT+DELETE in case where msgid has 887 * already been fully closed, ignore the message. 888 */ 889 if (state == &iocom->state0) { 890 if (msg->any.head.cmd & DMSGF_ABORT) { 891 error = EALREADY; 892 } else { 893 kprintf("kdmsg_state_msgrx: no state match " 894 "for REPLY|DELETE\n"); 895 error = EINVAL; 896 } 897 break; 898 } 899 900 /* 901 * Received REPLY+ABORT+DELETE in case where msgid has 902 * already been reused for an unrelated message, 903 * ignore the message. 904 */ 905 if ((state->rxcmd & DMSGF_CREATE) == 0) { 906 if (msg->any.head.cmd & DMSGF_ABORT) { 907 error = EALREADY; 908 } else { 909 kprintf("kdmsg_state_msgrx: state reused " 910 "for REPLY|DELETE\n"); 911 error = EINVAL; 912 } 913 break; 914 } 915 error = 0; 916 break; 917 case DMSGF_REPLY: 918 /* 919 * Check for mid-stream ABORT reply received to sent command. 920 */ 921 if (msg->any.head.cmd & DMSGF_ABORT) { 922 if (state == &iocom->state0 || 923 (state->rxcmd & DMSGF_CREATE) == 0) { 924 error = EALREADY; 925 break; 926 } 927 } 928 error = 0; 929 break; 930 } 931 932 /* 933 * Calculate the easy-switch() transactional command. Represents 934 * the outer-transaction command for any transaction-create or 935 * transaction-delete, and the inner message command for any 936 * non-transaction or inside-transaction command. tcmd will be 937 * set to 0 for any messaging error condition. 938 * 939 * The two can be told apart because outer-transaction commands 940 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 941 */ 942 done: 943 lockmgr(&iocom->msglk, LK_RELEASE); 944 945 if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 946 if (state != &iocom->state0) { 947 msg->tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 948 (msg->any.head.cmd & (DMSGF_CREATE | 949 DMSGF_DELETE | 950 DMSGF_REPLY)); 951 } else { 952 msg->tcmd = 0; 953 } 954 } else { 955 msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 956 } 957 return (error); 958 } 959 960 /* 961 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set. 962 * This routine must call iocom->rcvmsg() for anything not automatically 963 * handled. 964 */ 965 static int 966 kdmsg_autorxmsg(kdmsg_msg_t *msg) 967 { 968 kdmsg_iocom_t *iocom = msg->state->iocom; 969 int error = 0; 970 uint32_t cmd; 971 972 /* 973 * Main switch processes transaction create/delete sequences only. 974 * Use icmd (DELETEs use DMSG_LNK_ERROR 975 * 976 * NOTE: If processing in-transaction messages you generally want 977 * an inner switch on msg->any.head.cmd. 978 */ 979 if (msg->state) { 980 cmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 981 (msg->any.head.cmd & (DMSGF_CREATE | 982 DMSGF_DELETE | 983 DMSGF_REPLY)); 984 } else { 985 cmd = 0; 986 } 987 988 switch(cmd) { 989 case DMSG_LNK_CONN | DMSGF_CREATE: 990 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE: 991 /* 992 * Received LNK_CONN transaction. Transmit response and 993 * leave transaction open, which allows the other end to 994 * start to the SPAN protocol. 995 * 996 * Handle shim after acknowledging the CONN. 997 */ 998 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 999 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1000 kdmsg_msg_result(msg, 0); 1001 if (iocom->auto_callback) 1002 iocom->auto_callback(msg); 1003 } else { 1004 error = iocom->rcvmsg(msg); 1005 } 1006 break; 1007 } 1008 /* fall through */ 1009 case DMSG_LNK_CONN | DMSGF_DELETE: 1010 /* 1011 * This message is usually simulated after a link is lost 1012 * to clean up the transaction. 1013 */ 1014 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1015 if (iocom->auto_callback) 1016 iocom->auto_callback(msg); 1017 kdmsg_msg_reply(msg, 0); 1018 } else { 1019 error = iocom->rcvmsg(msg); 1020 } 1021 break; 1022 case DMSG_LNK_SPAN | DMSGF_CREATE: 1023 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1024 /* 1025 * Received LNK_SPAN transaction. We do not have to respond 1026 * (except on termination), but we must leave the transaction 1027 * open. 1028 * 1029 * Handle shim after acknowledging the SPAN. 1030 */ 1031 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1032 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1033 if (iocom->auto_callback) 1034 iocom->auto_callback(msg); 1035 break; 1036 } 1037 /* fall through */ 1038 } else { 1039 error = iocom->rcvmsg(msg); 1040 break; 1041 } 1042 /* fall through */ 1043 case DMSG_LNK_SPAN | DMSGF_DELETE: 1044 /* 1045 * Process shims (auto_callback) before cleaning up the 1046 * circuit structure and closing the transactions. Device 1047 * driver should ensure that the circuit is not used after 1048 * the auto_callback() returns. 1049 * 1050 * Handle shim before closing the SPAN transaction. 1051 */ 1052 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1053 if (iocom->auto_callback) 1054 iocom->auto_callback(msg); 1055 kdmsg_msg_reply(msg, 0); 1056 } else { 1057 error = iocom->rcvmsg(msg); 1058 } 1059 break; 1060 default: 1061 /* 1062 * Anything unhandled goes into rcvmsg. 1063 * 1064 * NOTE: Replies to link-level messages initiated by our side 1065 * are handled by the state callback, they are NOT 1066 * handled here. 1067 */ 1068 error = iocom->rcvmsg(msg); 1069 break; 1070 } 1071 return (error); 1072 } 1073 1074 /* 1075 * Post-receive-handling message and state cleanup. This routine is called 1076 * after the state function handling/callback to properly dispose of the 1077 * message and update or dispose of the state. 1078 */ 1079 static 1080 void 1081 kdmsg_state_cleanuprx(kdmsg_msg_t *msg) 1082 { 1083 kdmsg_iocom_t *iocom = msg->state->iocom; 1084 kdmsg_state_t *state; 1085 kdmsg_state_t *pstate; 1086 1087 if ((state = msg->state) == NULL) { 1088 kdmsg_msg_free(msg); 1089 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1090 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1091 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0); 1092 state->rxcmd |= DMSGF_DELETE; 1093 if (state->txcmd & DMSGF_DELETE) { 1094 KKASSERT(state->flags & KDMSG_STATE_INSERTED); 1095 if (state->rxcmd & DMSGF_REPLY) { 1096 KKASSERT(msg->any.head.cmd & 1097 DMSGF_REPLY); 1098 RB_REMOVE(kdmsg_state_tree, 1099 &iocom->statewr_tree, state); 1100 } else { 1101 KKASSERT((msg->any.head.cmd & 1102 DMSGF_REPLY) == 0); 1103 RB_REMOVE(kdmsg_state_tree, 1104 &iocom->staterd_tree, state); 1105 } 1106 pstate = state->parent; 1107 TAILQ_REMOVE(&pstate->subq, state, entry); 1108 if (pstate != &pstate->iocom->state0 && 1109 TAILQ_EMPTY(&pstate->subq) && 1110 (pstate->flags & KDMSG_STATE_INSERTED) == 0) { 1111 kdmsg_state_free(pstate); 1112 } 1113 state->flags &= ~KDMSG_STATE_INSERTED; 1114 state->parent = NULL; 1115 kdmsg_msg_free(msg); 1116 if (TAILQ_EMPTY(&state->subq)) 1117 kdmsg_state_free(state); 1118 lockmgr(&iocom->msglk, LK_RELEASE); 1119 } else { 1120 kdmsg_msg_free(msg); 1121 lockmgr(&iocom->msglk, LK_RELEASE); 1122 } 1123 } else { 1124 kdmsg_msg_free(msg); 1125 } 1126 } 1127 1128 /* 1129 * Simulate receiving a message which terminates an active transaction 1130 * state. Our simulated received message must set DELETE and may also 1131 * have to set CREATE. It must also ensure that all fields are set such 1132 * that the receive handling code can find the state (kdmsg_state_msgrx()) 1133 * or an endless loop will ensue. 1134 * 1135 * This is used when the other end of the link is dead so the device driver 1136 * gets a completed transaction for all pending states. 1137 */ 1138 static 1139 void 1140 kdmsg_state_abort(kdmsg_state_t *state) 1141 { 1142 kdmsg_msg_t *msg; 1143 1144 /* 1145 * Prevent recursive aborts which could otherwise occur if the 1146 * simulated message reception runs state->func which then turns 1147 * around and tries to reply to a broken circuit when then calls 1148 * the state abort code again. 1149 */ 1150 if (state->flags & KDMSG_STATE_ABORTING) 1151 return; 1152 state->flags |= KDMSG_STATE_ABORTING; 1153 1154 /* 1155 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation. 1156 * 1157 * NOTE: We are simulating a received message using our state 1158 * (vs a message generated by the other side using its state), 1159 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC. 1160 */ 1161 msg = kdmsg_msg_alloc(state, DMSG_LNK_ERROR, NULL, NULL); 1162 if ((state->rxcmd & DMSGF_CREATE) == 0) 1163 msg->any.head.cmd |= DMSGF_CREATE; 1164 msg->any.head.cmd |= DMSGF_DELETE | (state->rxcmd & DMSGF_REPLY); 1165 msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 1166 msg->any.head.error = DMSG_ERR_LOSTLINK; 1167 kdmsg_msg_receive_handling(msg); 1168 } 1169 1170 /* 1171 * Process state tracking for a message prior to transmission. 1172 * 1173 * Called with msglk held and the msg dequeued. Returns non-zero if 1174 * the message is bad and should be deleted by the caller. 1175 * 1176 * One-off messages are usually with dummy state and msg->state may be NULL 1177 * in this situation. 1178 * 1179 * New transactions (when CREATE is set) will insert the state. 1180 * 1181 * May request that caller discard the message by setting *discardp to 1. 1182 * A NULL state may be returned in this case. 1183 */ 1184 static 1185 int 1186 kdmsg_state_msgtx(kdmsg_msg_t *msg) 1187 { 1188 kdmsg_iocom_t *iocom = msg->state->iocom; 1189 kdmsg_state_t *state; 1190 int error; 1191 1192 /* 1193 * Make sure a state structure is ready to go in case we need a new 1194 * one. This is the only routine which uses freewr_state so no 1195 * races are possible. 1196 */ 1197 if ((state = iocom->freewr_state) == NULL) { 1198 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1199 state->flags = KDMSG_STATE_DYNAMIC; 1200 state->iocom = iocom; 1201 iocom->freewr_state = state; 1202 } 1203 1204 /* 1205 * Lock RB tree. If persistent state is present it will have already 1206 * been assigned to msg. 1207 */ 1208 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1209 state = msg->state; 1210 1211 /* 1212 * Short-cut one-off or mid-stream messages (state may be NULL). 1213 */ 1214 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1215 DMSGF_ABORT)) == 0) { 1216 lockmgr(&iocom->msglk, LK_RELEASE); 1217 return(0); 1218 } 1219 1220 1221 /* 1222 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 1223 * inside the case statements. 1224 */ 1225 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1226 DMSGF_REPLY)) { 1227 case DMSGF_CREATE: 1228 case DMSGF_CREATE | DMSGF_DELETE: 1229 /* 1230 * Insert the new persistent message state and mark 1231 * half-closed if DELETE is set. Since this is a new 1232 * message it isn't possible to transition into the fully 1233 * closed state here. 1234 * 1235 * XXX state must be assigned and inserted by 1236 * kdmsg_msg_write(). txcmd is assigned by us 1237 * on-transmit. 1238 */ 1239 KKASSERT(state != NULL); 1240 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 1241 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1242 state->rxcmd = DMSGF_REPLY; 1243 error = 0; 1244 break; 1245 case DMSGF_DELETE: 1246 /* 1247 * Sent ABORT+DELETE in case where msgid has already 1248 * been fully closed, ignore the message. 1249 */ 1250 if (state == &iocom->state0) { 1251 if (msg->any.head.cmd & DMSGF_ABORT) { 1252 error = EALREADY; 1253 } else { 1254 kprintf("kdmsg_state_msgtx: no state match " 1255 "for DELETE cmd=%08x msgid=%016jx\n", 1256 msg->any.head.cmd, 1257 (intmax_t)msg->any.head.msgid); 1258 error = EINVAL; 1259 } 1260 break; 1261 } 1262 1263 /* 1264 * Sent ABORT+DELETE in case where msgid has 1265 * already been reused for an unrelated message, 1266 * ignore the message. 1267 */ 1268 if ((state->txcmd & DMSGF_CREATE) == 0) { 1269 if (msg->any.head.cmd & DMSGF_ABORT) { 1270 error = EALREADY; 1271 } else { 1272 kprintf("kdmsg_state_msgtx: state reused " 1273 "for DELETE\n"); 1274 error = EINVAL; 1275 } 1276 break; 1277 } 1278 error = 0; 1279 break; 1280 default: 1281 /* 1282 * Check for mid-stream ABORT command sent 1283 */ 1284 if (msg->any.head.cmd & DMSGF_ABORT) { 1285 if (state == &state->iocom->state0 || 1286 (state->txcmd & DMSGF_CREATE) == 0) { 1287 error = EALREADY; 1288 break; 1289 } 1290 } 1291 error = 0; 1292 break; 1293 case DMSGF_REPLY | DMSGF_CREATE: 1294 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 1295 /* 1296 * When transmitting a reply with CREATE set the original 1297 * persistent state message should already exist. 1298 */ 1299 if (state == &state->iocom->state0) { 1300 kprintf("kdmsg_state_msgtx: no state match " 1301 "for REPLY | CREATE\n"); 1302 error = EINVAL; 1303 break; 1304 } 1305 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1306 error = 0; 1307 break; 1308 case DMSGF_REPLY | DMSGF_DELETE: 1309 /* 1310 * When transmitting a reply with DELETE set the original 1311 * persistent state message should already exist. 1312 * 1313 * This is very similar to the REPLY|CREATE|* case except 1314 * txcmd is already stored, so we just add the DELETE flag. 1315 * 1316 * Sent REPLY+ABORT+DELETE in case where msgid has 1317 * already been fully closed, ignore the message. 1318 */ 1319 if (state == &state->iocom->state0) { 1320 if (msg->any.head.cmd & DMSGF_ABORT) { 1321 error = EALREADY; 1322 } else { 1323 kprintf("kdmsg_state_msgtx: no state match " 1324 "for REPLY | DELETE\n"); 1325 error = EINVAL; 1326 } 1327 break; 1328 } 1329 1330 /* 1331 * Sent REPLY+ABORT+DELETE in case where msgid has already 1332 * been reused for an unrelated message, ignore the message. 1333 */ 1334 if ((state->txcmd & DMSGF_CREATE) == 0) { 1335 if (msg->any.head.cmd & DMSGF_ABORT) { 1336 error = EALREADY; 1337 } else { 1338 kprintf("kdmsg_state_msgtx: state reused " 1339 "for REPLY | DELETE\n"); 1340 error = EINVAL; 1341 } 1342 break; 1343 } 1344 error = 0; 1345 break; 1346 case DMSGF_REPLY: 1347 /* 1348 * Check for mid-stream ABORT reply sent. 1349 * 1350 * One-off REPLY messages are allowed for e.g. status updates. 1351 */ 1352 if (msg->any.head.cmd & DMSGF_ABORT) { 1353 if (state == &state->iocom->state0 || 1354 (state->txcmd & DMSGF_CREATE) == 0) { 1355 error = EALREADY; 1356 break; 1357 } 1358 } 1359 error = 0; 1360 break; 1361 } 1362 lockmgr(&iocom->msglk, LK_RELEASE); 1363 return (error); 1364 } 1365 1366 static 1367 void 1368 kdmsg_state_cleanuptx(kdmsg_msg_t *msg) 1369 { 1370 kdmsg_iocom_t *iocom = msg->state->iocom; 1371 kdmsg_state_t *state; 1372 kdmsg_state_t *pstate; 1373 1374 if ((state = msg->state) == NULL) { 1375 kdmsg_msg_free(msg); 1376 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1377 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1378 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1379 state->txcmd |= DMSGF_DELETE; 1380 if (state->rxcmd & DMSGF_DELETE) { 1381 KKASSERT(state->flags & KDMSG_STATE_INSERTED); 1382 if (state->txcmd & DMSGF_REPLY) { 1383 KKASSERT(msg->any.head.cmd & 1384 DMSGF_REPLY); 1385 RB_REMOVE(kdmsg_state_tree, 1386 &iocom->staterd_tree, state); 1387 } else { 1388 KKASSERT((msg->any.head.cmd & 1389 DMSGF_REPLY) == 0); 1390 RB_REMOVE(kdmsg_state_tree, 1391 &iocom->statewr_tree, state); 1392 } 1393 pstate = state->parent; 1394 TAILQ_REMOVE(&pstate->subq, state, entry); 1395 if (pstate != &pstate->iocom->state0 && 1396 TAILQ_EMPTY(&pstate->subq) && 1397 (pstate->flags & KDMSG_STATE_INSERTED) == 0) { 1398 kdmsg_state_free(pstate); 1399 } 1400 state->flags &= ~KDMSG_STATE_INSERTED; 1401 state->parent = NULL; 1402 kdmsg_msg_free(msg); 1403 if (TAILQ_EMPTY(&state->subq)) 1404 kdmsg_state_free(state); 1405 lockmgr(&iocom->msglk, LK_RELEASE); 1406 } else { 1407 kdmsg_msg_free(msg); 1408 lockmgr(&iocom->msglk, LK_RELEASE); 1409 } 1410 } else { 1411 kdmsg_msg_free(msg); 1412 } 1413 } 1414 1415 static 1416 void 1417 kdmsg_state_free(kdmsg_state_t *state) 1418 { 1419 kdmsg_iocom_t *iocom = state->iocom; 1420 1421 KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0); 1422 kfree(state, iocom->mmsg); 1423 } 1424 1425 kdmsg_msg_t * 1426 kdmsg_msg_alloc(kdmsg_state_t *state, uint32_t cmd, 1427 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data) 1428 { 1429 kdmsg_iocom_t *iocom = state->iocom; 1430 kdmsg_state_t *pstate; 1431 kdmsg_msg_t *msg; 1432 size_t hbytes; 1433 1434 KKASSERT(iocom != NULL); 1435 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 1436 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes, 1437 iocom->mmsg, M_WAITOK | M_ZERO); 1438 msg->hdr_size = hbytes; 1439 1440 if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 1441 /* 1442 * New transaction, requires tracking state and a unique 1443 * msgid to be allocated. 1444 */ 1445 pstate = state; 1446 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1447 TAILQ_INIT(&state->subq); 1448 state->iocom = iocom; 1449 state->parent = pstate; 1450 state->flags = KDMSG_STATE_DYNAMIC; 1451 state->func = func; 1452 state->any.any = data; 1453 state->msgid = (uint64_t)(uintptr_t)state; 1454 /*msg->any.head.msgid = state->msgid;XXX*/ 1455 1456 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1457 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state)) 1458 panic("duplicate msgid allocated"); 1459 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 1460 state->flags |= KDMSG_STATE_INSERTED; 1461 lockmgr(&iocom->msglk, LK_RELEASE); 1462 } else { 1463 pstate = state->parent; 1464 } 1465 1466 if (state->flags & KDMSG_STATE_OPPOSITE) 1467 cmd |= DMSGF_REVTRANS; 1468 if (pstate->flags & KDMSG_STATE_OPPOSITE) 1469 cmd |= DMSGF_REVCIRC; 1470 1471 msg->any.head.magic = DMSG_HDR_MAGIC; 1472 msg->any.head.cmd = cmd; 1473 msg->any.head.msgid = state->msgid; 1474 msg->any.head.circuit = pstate->msgid; 1475 msg->state = state; 1476 1477 return (msg); 1478 } 1479 1480 void 1481 kdmsg_msg_free(kdmsg_msg_t *msg) 1482 { 1483 kdmsg_iocom_t *iocom = msg->state->iocom; 1484 1485 if ((msg->flags & KDMSG_FLAG_AUXALLOC) && 1486 msg->aux_data && msg->aux_size) { 1487 kfree(msg->aux_data, iocom->mmsg); 1488 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1489 } 1490 msg->state = NULL; 1491 msg->aux_data = NULL; 1492 msg->aux_size = 0; 1493 1494 kfree(msg, iocom->mmsg); 1495 } 1496 1497 /* 1498 * Indexed messages are stored in a red-black tree indexed by their 1499 * msgid. Only persistent messages are indexed. 1500 */ 1501 int 1502 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2) 1503 { 1504 if (state1->iocom < state2->iocom) 1505 return(-1); 1506 if (state1->iocom > state2->iocom) 1507 return(1); 1508 if (state1->msgid < state2->msgid) 1509 return(-1); 1510 if (state1->msgid > state2->msgid) 1511 return(1); 1512 return(0); 1513 } 1514 1515 /* 1516 * Write a message. All requisit command flags have been set. 1517 * 1518 * If msg->state is non-NULL the message is written to the existing 1519 * transaction. msgid will be set accordingly. 1520 * 1521 * If msg->state is NULL and CREATE is set new state is allocated and 1522 * (func, data) is installed. A msgid is assigned. 1523 * 1524 * If msg->state is NULL and CREATE is not set the message is assumed 1525 * to be a one-way message. The originator must assign the msgid 1526 * (or leave it 0, which is typical. 1527 * 1528 * This function merely queues the message to the management thread, it 1529 * does not write to the message socket/pipe. 1530 */ 1531 void 1532 kdmsg_msg_write(kdmsg_msg_t *msg) 1533 { 1534 kdmsg_iocom_t *iocom = msg->state->iocom; 1535 kdmsg_state_t *state; 1536 1537 if (msg->state) { 1538 /* 1539 * Continuance or termination of existing transaction. 1540 * The transaction could have been initiated by either end. 1541 * 1542 * (Function callback and aux data for the receive side can 1543 * be replaced or left alone). 1544 */ 1545 state = msg->state; 1546 msg->any.head.msgid = state->msgid; 1547 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1548 } else { 1549 /* 1550 * One-off message (always uses msgid 0 to distinguish 1551 * between a possibly lost in-transaction message due to 1552 * competing aborts and a real one-off message?) 1553 */ 1554 state = NULL; 1555 msg->any.head.msgid = 0; 1556 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1557 } 1558 1559 /* 1560 * This flag is not set until after the tx thread has drained 1561 * the txmsgq and simulated responses. After that point the 1562 * txthread is dead and can no longer simulate responses. 1563 * 1564 * Device drivers should never try to send a message once this 1565 * flag is set. They should have detected (through the state 1566 * closures) that the link is in trouble. 1567 */ 1568 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) { 1569 lockmgr(&iocom->msglk, LK_RELEASE); 1570 panic("kdmsg_msg_write: Attempt to write message to " 1571 "terminated iocom\n"); 1572 } 1573 1574 /* 1575 * Finish up the msg fields. Note that msg->aux_size and the 1576 * aux_bytes stored in the message header represent the unaligned 1577 * (actual) bytes of data, but the buffer is sized to an aligned 1578 * size and the CRC is generated over the aligned length. 1579 */ 1580 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255); 1581 ++iocom->msg_seq; 1582 1583 if (msg->aux_data && msg->aux_size) { 1584 uint32_t abytes = DMSG_DOALIGN(msg->aux_size); 1585 1586 msg->any.head.aux_bytes = msg->aux_size; 1587 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes); 1588 } 1589 msg->any.head.hdr_crc = 0; 1590 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); 1591 1592 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); 1593 1594 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { 1595 atomic_clear_int(&iocom->msg_ctl, 1596 KDMSG_CLUSTERCTL_SLEEPING); 1597 wakeup(&iocom->msg_ctl); 1598 } 1599 1600 lockmgr(&iocom->msglk, LK_RELEASE); 1601 } 1602 1603 /* 1604 * Reply to a message and terminate our side of the transaction. 1605 * 1606 * If msg->state is non-NULL we are replying to a one-way message. 1607 */ 1608 void 1609 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error) 1610 { 1611 kdmsg_state_t *state = msg->state; 1612 kdmsg_msg_t *nmsg; 1613 uint32_t cmd; 1614 1615 /* 1616 * Reply with a simple error code and terminate the transaction. 1617 */ 1618 cmd = DMSG_LNK_ERROR; 1619 1620 /* 1621 * Check if our direction has even been initiated yet, set CREATE. 1622 * 1623 * Check what direction this is (command or reply direction). Note 1624 * that txcmd might not have been initiated yet. 1625 * 1626 * If our direction has already been closed we just return without 1627 * doing anything. 1628 */ 1629 if (state != &state->iocom->state0) { 1630 if (state->txcmd & DMSGF_DELETE) 1631 return; 1632 if ((state->txcmd & DMSGF_CREATE) == 0) 1633 cmd |= DMSGF_CREATE; 1634 if (state->txcmd & DMSGF_REPLY) 1635 cmd |= DMSGF_REPLY; 1636 cmd |= DMSGF_DELETE; 1637 } else { 1638 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 1639 cmd |= DMSGF_REPLY; 1640 } 1641 1642 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1643 nmsg->any.head.error = error; 1644 kdmsg_msg_write(nmsg); 1645 } 1646 1647 /* 1648 * Reply to a message and continue our side of the transaction. 1649 * 1650 * If msg->state is non-NULL we are replying to a one-way message and this 1651 * function degenerates into the same as kdmsg_msg_reply(). 1652 */ 1653 void 1654 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error) 1655 { 1656 kdmsg_state_t *state = msg->state; 1657 kdmsg_msg_t *nmsg; 1658 uint32_t cmd; 1659 1660 /* 1661 * Return a simple result code, do NOT terminate the transaction. 1662 */ 1663 cmd = DMSG_LNK_ERROR; 1664 1665 /* 1666 * Check if our direction has even been initiated yet, set CREATE. 1667 * 1668 * Check what direction this is (command or reply direction). Note 1669 * that txcmd might not have been initiated yet. 1670 * 1671 * If our direction has already been closed we just return without 1672 * doing anything. 1673 */ 1674 if (state != &state->iocom->state0) { 1675 if (state->txcmd & DMSGF_DELETE) 1676 return; 1677 if ((state->txcmd & DMSGF_CREATE) == 0) 1678 cmd |= DMSGF_CREATE; 1679 if (state->txcmd & DMSGF_REPLY) 1680 cmd |= DMSGF_REPLY; 1681 /* continuing transaction, do not set MSGF_DELETE */ 1682 } else { 1683 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 1684 cmd |= DMSGF_REPLY; 1685 } 1686 1687 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1688 nmsg->any.head.error = error; 1689 kdmsg_msg_write(nmsg); 1690 } 1691 1692 /* 1693 * Reply to a message and terminate our side of the transaction. 1694 * 1695 * If msg->state is non-NULL we are replying to a one-way message. 1696 */ 1697 void 1698 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error) 1699 { 1700 kdmsg_msg_t *nmsg; 1701 uint32_t cmd; 1702 1703 /* 1704 * Reply with a simple error code and terminate the transaction. 1705 */ 1706 cmd = DMSG_LNK_ERROR; 1707 1708 /* 1709 * Check if our direction has even been initiated yet, set CREATE. 1710 * 1711 * Check what direction this is (command or reply direction). Note 1712 * that txcmd might not have been initiated yet. 1713 * 1714 * If our direction has already been closed we just return without 1715 * doing anything. 1716 */ 1717 KKASSERT(state); 1718 if (state->txcmd & DMSGF_DELETE) 1719 return; 1720 if ((state->txcmd & DMSGF_CREATE) == 0) 1721 cmd |= DMSGF_CREATE; 1722 if (state->txcmd & DMSGF_REPLY) 1723 cmd |= DMSGF_REPLY; 1724 cmd |= DMSGF_DELETE; 1725 1726 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1727 nmsg->any.head.error = error; 1728 kdmsg_msg_write(nmsg); 1729 } 1730 1731 /* 1732 * Reply to a message and continue our side of the transaction. 1733 * 1734 * If msg->state is non-NULL we are replying to a one-way message and this 1735 * function degenerates into the same as kdmsg_msg_reply(). 1736 */ 1737 void 1738 kdmsg_state_result(kdmsg_state_t *state, uint32_t error) 1739 { 1740 kdmsg_msg_t *nmsg; 1741 uint32_t cmd; 1742 1743 /* 1744 * Return a simple result code, do NOT terminate the transaction. 1745 */ 1746 cmd = DMSG_LNK_ERROR; 1747 1748 /* 1749 * Check if our direction has even been initiated yet, set CREATE. 1750 * 1751 * Check what direction this is (command or reply direction). Note 1752 * that txcmd might not have been initiated yet. 1753 * 1754 * If our direction has already been closed we just return without 1755 * doing anything. 1756 */ 1757 KKASSERT(state); 1758 if (state->txcmd & DMSGF_DELETE) 1759 return; 1760 if ((state->txcmd & DMSGF_CREATE) == 0) 1761 cmd |= DMSGF_CREATE; 1762 if (state->txcmd & DMSGF_REPLY) 1763 cmd |= DMSGF_REPLY; 1764 /* continuing transaction, do not set MSGF_DELETE */ 1765 1766 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1767 nmsg->any.head.error = error; 1768 kdmsg_msg_write(nmsg); 1769 } 1770