1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 #include "spdk/crc32.h" 36 #include "spdk/endian.h" 37 #include "spdk/assert.h" 38 #include "spdk/thread.h" 39 #include "spdk/nvmf.h" 40 #include "spdk/nvmf_spec.h" 41 #include "spdk/sock.h" 42 #include "spdk/string.h" 43 #include "spdk/trace.h" 44 #include "spdk/util.h" 45 46 #include "nvmf_internal.h" 47 #include "transport.h" 48 49 #include "spdk_internal/log.h" 50 #include "spdk_internal/nvme_tcp.h" 51 52 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 53 54 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE 131072 55 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072 56 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */ 57 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6 58 59 /* spdk nvmf related structure */ 60 enum spdk_nvmf_tcp_req_state { 61 62 /* The request is not currently in use */ 63 TCP_REQUEST_STATE_FREE = 0, 64 65 /* Initial state when request first received */ 66 TCP_REQUEST_STATE_NEW, 67 68 /* The request is queued until a data buffer is available. */ 69 TCP_REQUEST_STATE_NEED_BUFFER, 70 71 /* The request is currently transferring data from the host to the controller. */ 72 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 73 74 /* The request is ready to execute at the block device */ 75 TCP_REQUEST_STATE_READY_TO_EXECUTE, 76 77 /* The request is currently executing at the block device */ 78 TCP_REQUEST_STATE_EXECUTING, 79 80 /* The request finished executing at the block device */ 81 TCP_REQUEST_STATE_EXECUTED, 82 83 /* The request is ready to send a completion */ 84 TCP_REQUEST_STATE_READY_TO_COMPLETE, 85 86 /* The request is currently transferring final pdus from the controller to the host. */ 87 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 88 89 /* The request completed and can be marked free. */ 90 TCP_REQUEST_STATE_COMPLETED, 91 92 /* Terminator */ 93 TCP_REQUEST_NUM_STATES, 94 }; 95 96 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 97 "Invalid PDU Header Field", 98 "PDU Sequence Error", 99 "Header Digiest Error", 100 "Data Transfer Out of Range", 101 "R2T Limit Exceeded", 102 "Unsupported parameter", 103 }; 104 105 #define OBJECT_NVMF_TCP_IO 0x80 106 107 #define TRACE_GROUP_NVMF_TCP 0x5 108 #define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0) 109 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1) 110 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2) 111 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3) 112 #define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4) 113 #define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5) 114 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6) 115 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7) 116 #define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8) 117 #define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9) 118 #define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA) 119 #define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB) 120 121 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 122 { 123 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 124 spdk_trace_register_description("TCP_REQ_NEW", 125 TRACE_TCP_REQUEST_STATE_NEW, 126 OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, ""); 127 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 128 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 129 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 130 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 131 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 132 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 133 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 134 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 135 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 136 spdk_trace_register_description("TCP_REQ_EXECUTING", 137 TRACE_TCP_REQUEST_STATE_EXECUTING, 138 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 139 spdk_trace_register_description("TCP_REQ_EXECUTED", 140 TRACE_TCP_REQUEST_STATE_EXECUTED, 141 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 142 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 143 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 144 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 145 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 146 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 147 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 148 spdk_trace_register_description("TCP_REQ_COMPLETED", 149 TRACE_TCP_REQUEST_STATE_COMPLETED, 150 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 151 spdk_trace_register_description("TCP_WRITE_START", 152 TRACE_TCP_FLUSH_WRITEBUF_START, 153 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 154 spdk_trace_register_description("TCP_WRITE_DONE", 155 TRACE_TCP_FLUSH_WRITEBUF_DONE, 156 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 157 spdk_trace_register_description("TCP_READ_DONE", 158 TRACE_TCP_READ_FROM_SOCKET_DONE, 159 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 160 } 161 162 struct spdk_nvmf_tcp_req { 163 struct spdk_nvmf_request req; 164 struct spdk_nvme_cpl rsp; 165 struct spdk_nvme_cmd cmd; 166 167 /* In-capsule data buffer */ 168 uint8_t *buf; 169 170 bool data_from_pool; 171 bool has_incapsule_data; 172 173 /* transfer_tag */ 174 uint16_t ttag; 175 176 enum spdk_nvmf_tcp_req_state state; 177 178 void *buffers[SPDK_NVMF_MAX_SGL_ENTRIES]; 179 180 /* 181 * next_expected_r2t_offset is used when we receive the h2c_data PDU. 182 */ 183 uint32_t next_expected_r2t_offset; 184 uint32_t r2tl_remain; 185 186 /* 187 * c2h_data_offset is used when we send the c2h_data PDU. 188 */ 189 uint32_t c2h_data_offset; 190 uint32_t c2h_data_pdu_num; 191 192 struct spdk_dif_ctx dif_ctx; 193 bool dif_insert_or_strip; 194 uint32_t elba_length; 195 uint32_t orig_length; 196 197 TAILQ_ENTRY(spdk_nvmf_tcp_req) link; 198 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 199 }; 200 201 struct spdk_nvmf_tcp_qpair { 202 struct spdk_nvmf_qpair qpair; 203 struct spdk_nvmf_tcp_poll_group *group; 204 struct spdk_nvmf_tcp_port *port; 205 struct spdk_sock *sock; 206 struct spdk_poller *flush_poller; 207 208 enum nvme_tcp_pdu_recv_state recv_state; 209 enum nvme_tcp_qpair_state state; 210 211 struct nvme_tcp_pdu pdu_in_progress; 212 213 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 214 TAILQ_HEAD(, nvme_tcp_pdu) free_queue; 215 216 struct nvme_tcp_pdu *pdu; 217 struct nvme_tcp_pdu *pdu_pool; 218 uint16_t free_pdu_num; 219 220 /* Queues to track the requests in all states */ 221 TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES]; 222 /* Number of requests in each state */ 223 int32_t state_cntr[TCP_REQUEST_NUM_STATES]; 224 225 TAILQ_HEAD(, spdk_nvmf_tcp_req) queued_c2h_data_tcp_req; 226 227 uint8_t cpda; 228 229 /* Array of size "max_queue_depth * InCapsuleDataSize" containing 230 * buffers to be used for in capsule data. 231 */ 232 void *buf; 233 void *bufs; 234 struct spdk_nvmf_tcp_req *req; 235 struct spdk_nvmf_tcp_req *reqs; 236 237 bool host_hdgst_enable; 238 bool host_ddgst_enable; 239 240 241 /* The maximum number of I/O outstanding on this connection at one time */ 242 uint16_t max_queue_depth; 243 244 245 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 246 uint32_t maxh2cdata; 247 248 uint32_t c2h_data_pdu_cnt; 249 250 /* IP address */ 251 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 252 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 253 254 /* IP port */ 255 uint16_t initiator_port; 256 uint16_t target_port; 257 258 /* Timer used to destroy qpair after detecting transport error issue if initiator does 259 * not close the connection. 260 */ 261 struct spdk_poller *timeout_poller; 262 263 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 264 }; 265 266 struct spdk_nvmf_tcp_poll_group { 267 struct spdk_nvmf_transport_poll_group group; 268 struct spdk_sock_group *sock_group; 269 270 /* Requests that are waiting to obtain a data buffer */ 271 TAILQ_HEAD(, spdk_nvmf_tcp_req) pending_data_buf_queue; 272 273 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 274 }; 275 276 struct spdk_nvmf_tcp_port { 277 struct spdk_nvme_transport_id trid; 278 struct spdk_sock *listen_sock; 279 uint32_t ref; 280 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 281 }; 282 283 struct spdk_nvmf_tcp_transport { 284 struct spdk_nvmf_transport transport; 285 286 pthread_mutex_t lock; 287 288 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 289 }; 290 291 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 292 struct spdk_nvmf_tcp_req *tcp_req); 293 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair); 294 295 static void 296 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 297 enum spdk_nvmf_tcp_req_state state) 298 { 299 struct spdk_nvmf_qpair *qpair; 300 struct spdk_nvmf_tcp_qpair *tqpair; 301 302 qpair = tcp_req->req.qpair; 303 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 304 305 TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 306 tqpair->state_cntr[tcp_req->state]--; 307 assert(tqpair->state_cntr[tcp_req->state] >= 0); 308 309 TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link); 310 tqpair->state_cntr[state]++; 311 312 tcp_req->state = state; 313 } 314 315 static struct nvme_tcp_pdu * 316 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair) 317 { 318 struct nvme_tcp_pdu *pdu; 319 320 pdu = TAILQ_FIRST(&tqpair->free_queue); 321 if (!pdu) { 322 SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair); 323 abort(); 324 return NULL; 325 } 326 327 tqpair->free_pdu_num--; 328 TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq); 329 memset(pdu, 0, sizeof(*pdu)); 330 pdu->ref = 1; 331 332 return pdu; 333 } 334 335 static void 336 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 337 { 338 if (!pdu) { 339 return; 340 } 341 342 assert(pdu->ref > 0); 343 344 pdu->ref--; 345 if (pdu->ref == 0) { 346 tqpair->free_pdu_num++; 347 TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq); 348 } 349 } 350 351 static struct spdk_nvmf_tcp_req * 352 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 353 { 354 struct spdk_nvmf_tcp_req *tcp_req; 355 356 tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]); 357 if (!tcp_req) { 358 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 359 return NULL; 360 } 361 362 memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd)); 363 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 364 tcp_req->next_expected_r2t_offset = 0; 365 tcp_req->r2tl_remain = 0; 366 tcp_req->c2h_data_offset = 0; 367 tcp_req->has_incapsule_data = false; 368 tcp_req->dif_insert_or_strip = false; 369 370 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 371 return tcp_req; 372 } 373 374 static void 375 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req) 376 { 377 struct spdk_nvmf_tcp_transport *ttransport; 378 379 if (!tcp_req) { 380 return; 381 } 382 383 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req); 384 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 385 struct spdk_nvmf_tcp_transport, transport); 386 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 387 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 388 } 389 390 static int 391 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req) 392 { 393 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 394 395 nvmf_tcp_request_free(tcp_req); 396 397 return 0; 398 } 399 400 static void 401 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 402 enum spdk_nvmf_tcp_req_state state) 403 { 404 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 405 406 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) { 407 nvmf_tcp_request_free(tcp_req); 408 } 409 } 410 411 static void 412 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 413 { 414 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 415 struct nvme_tcp_pdu *pdu, *tmp_pdu; 416 417 /* Free the pdus in the send_queue */ 418 TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) { 419 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 420 /* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */ 421 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) { 422 assert(tqpair->c2h_data_pdu_cnt > 0); 423 tqpair->c2h_data_pdu_cnt--; 424 } 425 spdk_nvmf_tcp_pdu_put(tqpair, pdu); 426 } 427 428 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->queued_c2h_data_tcp_req, link, req_tmp) { 429 TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 430 } 431 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 432 433 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 434 435 /* Wipe the requests waiting for buffer from the global list */ 436 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link, 437 req_tmp) { 438 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 439 } 440 441 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 442 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 443 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 444 } 445 446 static void 447 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 448 { 449 int i; 450 struct spdk_nvmf_tcp_req *tcp_req; 451 452 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 453 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 454 SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]); 455 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) { 456 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->data_from_pool); 457 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 458 } 459 } 460 } 461 462 static void 463 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 464 { 465 int err = 0; 466 467 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 468 469 spdk_poller_unregister(&tqpair->flush_poller); 470 spdk_sock_close(&tqpair->sock); 471 spdk_nvmf_tcp_cleanup_all_states(tqpair); 472 473 if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) { 474 SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair, 475 tqpair->free_pdu_num, 476 (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)); 477 err++; 478 } 479 480 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) { 481 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 482 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 483 tqpair->max_queue_depth); 484 err++; 485 } 486 487 if (tqpair->c2h_data_pdu_cnt != 0) { 488 SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair, 489 tqpair->c2h_data_pdu_cnt); 490 err++; 491 } 492 493 if (err > 0) { 494 nvmf_tcp_dump_qpair_req_contents(tqpair); 495 } 496 free(tqpair->pdu); 497 free(tqpair->pdu_pool); 498 free(tqpair->req); 499 free(tqpair->reqs); 500 spdk_free(tqpair->buf); 501 spdk_free(tqpair->bufs); 502 free(tqpair); 503 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n"); 504 } 505 506 static int 507 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport) 508 { 509 struct spdk_nvmf_tcp_transport *ttransport; 510 511 assert(transport != NULL); 512 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 513 514 pthread_mutex_destroy(&ttransport->lock); 515 free(ttransport); 516 return 0; 517 } 518 519 static struct spdk_nvmf_transport * 520 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 521 { 522 struct spdk_nvmf_tcp_transport *ttransport; 523 uint32_t sge_count; 524 uint32_t min_shared_buffers; 525 526 ttransport = calloc(1, sizeof(*ttransport)); 527 if (!ttransport) { 528 return NULL; 529 } 530 531 TAILQ_INIT(&ttransport->ports); 532 533 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 534 535 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 536 537 SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n" 538 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 539 " max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 540 " in_capsule_data_size=%d, max_aq_depth=%d\n" 541 " num_shared_buffers=%d, c2h_success=%d,\n" 542 " dif_insert_or_strip=%d, sock_priority=%d\n", 543 opts->max_queue_depth, 544 opts->max_io_size, 545 opts->max_qpairs_per_ctrlr, 546 opts->io_unit_size, 547 opts->in_capsule_data_size, 548 opts->max_aq_depth, 549 opts->num_shared_buffers, 550 opts->c2h_success, 551 opts->dif_insert_or_strip, 552 opts->sock_priority); 553 554 if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 555 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 556 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 557 opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 558 free(ttransport); 559 return NULL; 560 } 561 562 /* I/O unit size cannot be larger than max I/O size */ 563 if (opts->io_unit_size > opts->max_io_size) { 564 opts->io_unit_size = opts->max_io_size; 565 } 566 567 sge_count = opts->max_io_size / opts->io_unit_size; 568 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 569 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 570 free(ttransport); 571 return NULL; 572 } 573 574 min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size; 575 if (min_shared_buffers > opts->num_shared_buffers) { 576 SPDK_ERRLOG("There are not enough buffers to satisfy" 577 "per-poll group caches for each thread. (%" PRIu32 ")" 578 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 579 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 580 spdk_nvmf_tcp_destroy(&ttransport->transport); 581 return NULL; 582 } 583 584 pthread_mutex_init(&ttransport->lock, NULL); 585 586 return &ttransport->transport; 587 } 588 589 static int 590 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid) 591 { 592 unsigned long long ull; 593 char *end = NULL; 594 595 ull = strtoull(trsvcid, &end, 10); 596 if (end == NULL || end == trsvcid || *end != '\0') { 597 return -1; 598 } 599 600 /* Valid TCP/IP port numbers are in [0, 65535] */ 601 if (ull > 65535) { 602 return -1; 603 } 604 605 return (int)ull; 606 } 607 608 /** 609 * Canonicalize a listen address trid. 610 */ 611 static int 612 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 613 const struct spdk_nvme_transport_id *trid) 614 { 615 int trsvcid_int; 616 617 trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid); 618 if (trsvcid_int < 0) { 619 return -EINVAL; 620 } 621 622 memset(canon_trid, 0, sizeof(*canon_trid)); 623 canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP; 624 canon_trid->adrfam = trid->adrfam; 625 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 626 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 627 628 return 0; 629 } 630 631 /** 632 * Find an existing listening port. 633 * 634 * Caller must hold ttransport->lock. 635 */ 636 static struct spdk_nvmf_tcp_port * 637 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 638 const struct spdk_nvme_transport_id *trid) 639 { 640 struct spdk_nvme_transport_id canon_trid; 641 struct spdk_nvmf_tcp_port *port; 642 643 if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 644 return NULL; 645 } 646 647 TAILQ_FOREACH(port, &ttransport->ports, link) { 648 if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) { 649 return port; 650 } 651 } 652 653 return NULL; 654 } 655 656 static int 657 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport, 658 const struct spdk_nvme_transport_id *trid) 659 { 660 struct spdk_nvmf_tcp_transport *ttransport; 661 struct spdk_nvmf_tcp_port *port; 662 int trsvcid_int; 663 uint8_t adrfam; 664 665 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 666 667 trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid); 668 if (trsvcid_int < 0) { 669 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 670 return -EINVAL; 671 } 672 673 pthread_mutex_lock(&ttransport->lock); 674 675 port = _spdk_nvmf_tcp_find_port(ttransport, trid); 676 if (port) { 677 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n", 678 trid->traddr, trid->trsvcid); 679 port->ref++; 680 pthread_mutex_unlock(&ttransport->lock); 681 return 0; 682 } 683 684 port = calloc(1, sizeof(*port)); 685 if (!port) { 686 SPDK_ERRLOG("Port allocation failed\n"); 687 free(port); 688 pthread_mutex_unlock(&ttransport->lock); 689 return -ENOMEM; 690 } 691 692 port->ref = 1; 693 694 if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) { 695 SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n", 696 trid->traddr, trid->trsvcid); 697 free(port); 698 pthread_mutex_unlock(&ttransport->lock); 699 return -ENOMEM; 700 } 701 702 port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int); 703 if (port->listen_sock == NULL) { 704 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 705 trid->traddr, trsvcid_int, 706 spdk_strerror(errno), errno); 707 free(port); 708 pthread_mutex_unlock(&ttransport->lock); 709 return -errno; 710 } 711 712 if (spdk_sock_is_ipv4(port->listen_sock)) { 713 adrfam = SPDK_NVMF_ADRFAM_IPV4; 714 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 715 adrfam = SPDK_NVMF_ADRFAM_IPV6; 716 } else { 717 SPDK_ERRLOG("Unhandled socket type\n"); 718 adrfam = 0; 719 } 720 721 if (adrfam != trid->adrfam) { 722 SPDK_ERRLOG("Socket address family mismatch\n"); 723 spdk_sock_close(&port->listen_sock); 724 free(port); 725 pthread_mutex_unlock(&ttransport->lock); 726 return -EINVAL; 727 } 728 729 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n", 730 trid->traddr, trsvcid_int); 731 732 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 733 pthread_mutex_unlock(&ttransport->lock); 734 735 return 0; 736 } 737 738 static int 739 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 740 const struct spdk_nvme_transport_id *trid) 741 { 742 struct spdk_nvmf_tcp_transport *ttransport; 743 struct spdk_nvmf_tcp_port *port; 744 int rc; 745 746 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 747 748 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n", 749 trid->traddr, trid->trsvcid); 750 751 pthread_mutex_lock(&ttransport->lock); 752 port = _spdk_nvmf_tcp_find_port(ttransport, trid); 753 if (port) { 754 assert(port->ref > 0); 755 port->ref--; 756 if (port->ref == 0) { 757 TAILQ_REMOVE(&ttransport->ports, port, link); 758 spdk_sock_close(&port->listen_sock); 759 free(port); 760 } 761 rc = 0; 762 } else { 763 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n"); 764 rc = -ENOENT; 765 } 766 pthread_mutex_unlock(&ttransport->lock); 767 768 return rc; 769 } 770 771 static int 772 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair) 773 { 774 const int array_size = 32; 775 struct iovec iovs[array_size]; 776 int iovcnt = 0; 777 int bytes = 0; 778 int total_length = 0; 779 uint32_t mapped_length; 780 struct nvme_tcp_pdu *pdu; 781 int pdu_length; 782 TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list; 783 784 pdu = TAILQ_FIRST(&tqpair->send_queue); 785 786 if (pdu == NULL) { 787 return 0; 788 } 789 790 /* 791 * Build up a list of iovecs for the first few PDUs in the 792 * tqpair 's send_queue. 793 */ 794 while (pdu != NULL && ((array_size - iovcnt) >= 3)) { 795 iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt], 796 array_size - iovcnt, 797 pdu, 798 tqpair->host_hdgst_enable, 799 tqpair->host_ddgst_enable, 800 &mapped_length); 801 total_length += mapped_length; 802 pdu = TAILQ_NEXT(pdu, tailq); 803 } 804 805 spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt); 806 807 bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt); 808 if (bytes == -1) { 809 if (errno == EWOULDBLOCK || errno == EAGAIN) { 810 return 1; 811 } else { 812 SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n", 813 errno, spdk_strerror(errno)); 814 return -1; 815 } 816 } 817 818 spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0); 819 820 pdu = TAILQ_FIRST(&tqpair->send_queue); 821 822 /* 823 * Free any PDUs that were fully written. If a PDU was only 824 * partially written, update its writev_offset so that next 825 * time only the unwritten portion will be sent to writev(). 826 */ 827 TAILQ_INIT(&completed_pdus_list); 828 while (bytes > 0) { 829 pdu_length = pdu->hdr.common.plen - pdu->writev_offset; 830 if (bytes >= pdu_length) { 831 bytes -= pdu_length; 832 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 833 TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq); 834 pdu = TAILQ_FIRST(&tqpair->send_queue); 835 836 } else { 837 pdu->writev_offset += bytes; 838 bytes = 0; 839 } 840 } 841 842 while (!TAILQ_EMPTY(&completed_pdus_list)) { 843 pdu = TAILQ_FIRST(&completed_pdus_list); 844 TAILQ_REMOVE(&completed_pdus_list, pdu, tailq); 845 assert(pdu->cb_fn != NULL); 846 pdu->cb_fn(pdu->cb_arg); 847 spdk_nvmf_tcp_pdu_put(tqpair, pdu); 848 } 849 850 return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1; 851 } 852 853 static int 854 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair) 855 { 856 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 857 int rc; 858 859 if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) { 860 rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair); 861 if (rc == 0 && tqpair->flush_poller != NULL) { 862 spdk_poller_unregister(&tqpair->flush_poller); 863 } else if (rc == 1 && tqpair->flush_poller == NULL) { 864 tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus, 865 tqpair, 50); 866 } 867 } else { 868 /* 869 * If the tqpair state is not RUNNING, then 870 * keep trying to flush PDUs until our list is 871 * empty - to make sure all data is sent before 872 * closing the connection. 873 */ 874 do { 875 rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair); 876 } while (rc == 1); 877 } 878 879 if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) { 880 /* 881 * If the poller has already started destruction of the tqpair, 882 * i.e. the socket read failed, then the connection state may already 883 * be EXITED. We don't want to set it back to EXITING in that case. 884 */ 885 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 886 } 887 888 return -1; 889 } 890 891 static void 892 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 893 struct nvme_tcp_pdu *pdu, 894 nvme_tcp_qpair_xfer_complete_cb cb_fn, 895 void *cb_arg) 896 { 897 int enable_digest; 898 int hlen; 899 uint32_t crc32c; 900 901 hlen = pdu->hdr.common.hlen; 902 enable_digest = 1; 903 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 904 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 905 /* this PDU should be sent without digest */ 906 enable_digest = 0; 907 } 908 909 /* Header Digest */ 910 if (enable_digest && tqpair->host_hdgst_enable) { 911 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 912 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 913 } 914 915 /* Data Digest */ 916 if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) { 917 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 918 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 919 } 920 921 pdu->cb_fn = cb_fn; 922 pdu->cb_arg = cb_arg; 923 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 924 spdk_nvmf_tcp_qpair_flush_pdus(tqpair); 925 } 926 927 static int 928 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size) 929 { 930 int i; 931 struct spdk_nvmf_tcp_req *tcp_req; 932 struct spdk_nvmf_transport *transport = tqpair->qpair.transport; 933 uint32_t in_capsule_data_size; 934 935 in_capsule_data_size = transport->opts.in_capsule_data_size; 936 if (transport->opts.dif_insert_or_strip) { 937 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 938 } 939 940 if (!tqpair->qpair.sq_head_max) { 941 tqpair->req = calloc(1, sizeof(*tqpair->req)); 942 if (!tqpair->req) { 943 SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair); 944 return -1; 945 } 946 947 if (in_capsule_data_size) { 948 tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000, 949 NULL, SPDK_ENV_LCORE_ID_ANY, 950 SPDK_MALLOC_DMA); 951 if (!tqpair->buf) { 952 SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair); 953 return -1; 954 } 955 } 956 957 tcp_req = tqpair->req; 958 tcp_req->ttag = 0; 959 tcp_req->req.qpair = &tqpair->qpair; 960 961 /* Set up memory to receive commands */ 962 if (tqpair->buf) { 963 tcp_req->buf = tqpair->buf; 964 } 965 966 /* Set the cmdn and rsp */ 967 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 968 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 969 970 /* Initialize request state to FREE */ 971 tcp_req->state = TCP_REQUEST_STATE_FREE; 972 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 973 974 tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu)); 975 if (!tqpair->pdu) { 976 SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair); 977 return -1; 978 } 979 980 for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) { 981 TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq); 982 } 983 984 } else { 985 tqpair->reqs = calloc(size, sizeof(*tqpair->reqs)); 986 if (!tqpair->reqs) { 987 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 988 return -1; 989 } 990 991 if (in_capsule_data_size) { 992 tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000, 993 NULL, SPDK_ENV_LCORE_ID_ANY, 994 SPDK_MALLOC_DMA); 995 if (!tqpair->bufs) { 996 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 997 return -1; 998 } 999 } 1000 1001 for (i = 0; i < size; i++) { 1002 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 1003 1004 tcp_req->ttag = i + 1; 1005 tcp_req->req.qpair = &tqpair->qpair; 1006 1007 /* Set up memory to receive commands */ 1008 if (tqpair->bufs) { 1009 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 1010 } 1011 1012 /* Set the cmdn and rsp */ 1013 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 1014 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 1015 1016 /* Initialize request state to FREE */ 1017 tcp_req->state = TCP_REQUEST_STATE_FREE; 1018 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 1019 } 1020 1021 tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool)); 1022 if (!tqpair->pdu_pool) { 1023 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 1024 return -1; 1025 } 1026 1027 for (i = 0; i < size; i++) { 1028 TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq); 1029 } 1030 } 1031 1032 return 0; 1033 } 1034 1035 static int 1036 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 1037 { 1038 struct spdk_nvmf_tcp_qpair *tqpair; 1039 int i; 1040 1041 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1042 1043 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair); 1044 1045 TAILQ_INIT(&tqpair->send_queue); 1046 TAILQ_INIT(&tqpair->free_queue); 1047 TAILQ_INIT(&tqpair->queued_c2h_data_tcp_req); 1048 1049 /* Initialise request state queues of the qpair */ 1050 for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) { 1051 TAILQ_INIT(&tqpair->state_queue[i]); 1052 } 1053 1054 tqpair->host_hdgst_enable = true; 1055 tqpair->host_ddgst_enable = true; 1056 1057 return 0; 1058 } 1059 1060 static int 1061 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 1062 { 1063 1064 int rc; 1065 int buf_size; 1066 1067 /* set recv buffer size */ 1068 buf_size = 2 * 1024 * 1024; 1069 rc = spdk_sock_set_recvbuf(tqpair->sock, buf_size); 1070 if (rc != 0) { 1071 SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n"); 1072 return rc; 1073 } 1074 1075 /* set send buffer size */ 1076 rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size); 1077 if (rc != 0) { 1078 SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n"); 1079 return rc; 1080 } 1081 1082 /* set low water mark */ 1083 rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr)); 1084 if (rc != 0) { 1085 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 1086 return rc; 1087 } 1088 1089 return 0; 1090 } 1091 1092 static void 1093 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, 1094 struct spdk_nvmf_tcp_port *port, 1095 struct spdk_sock *sock, new_qpair_fn cb_fn) 1096 { 1097 struct spdk_nvmf_tcp_qpair *tqpair; 1098 int rc; 1099 1100 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n", 1101 port->trid.traddr, port->trid.trsvcid); 1102 1103 if (transport->opts.sock_priority) { 1104 rc = spdk_sock_set_priority(sock, transport->opts.sock_priority); 1105 if (rc) { 1106 SPDK_ERRLOG("Failed to set the priority of the socket\n"); 1107 spdk_sock_close(&sock); 1108 return; 1109 } 1110 } 1111 1112 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 1113 if (tqpair == NULL) { 1114 SPDK_ERRLOG("Could not allocate new connection.\n"); 1115 spdk_sock_close(&sock); 1116 return; 1117 } 1118 1119 tqpair->sock = sock; 1120 tqpair->max_queue_depth = 1; 1121 tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; 1122 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth; 1123 tqpair->port = port; 1124 tqpair->qpair.transport = transport; 1125 1126 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 1127 sizeof(tqpair->target_addr), &tqpair->target_port, 1128 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 1129 &tqpair->initiator_port); 1130 if (rc < 0) { 1131 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 1132 spdk_nvmf_tcp_qpair_destroy(tqpair); 1133 return; 1134 } 1135 1136 cb_fn(&tqpair->qpair); 1137 } 1138 1139 static void 1140 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port, 1141 new_qpair_fn cb_fn) 1142 { 1143 struct spdk_sock *sock; 1144 int i; 1145 1146 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 1147 sock = spdk_sock_accept(port->listen_sock); 1148 if (sock) { 1149 _spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn); 1150 } 1151 } 1152 } 1153 1154 static void 1155 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn) 1156 { 1157 struct spdk_nvmf_tcp_transport *ttransport; 1158 struct spdk_nvmf_tcp_port *port; 1159 1160 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1161 1162 TAILQ_FOREACH(port, &ttransport->ports, link) { 1163 spdk_nvmf_tcp_port_accept(transport, port, cb_fn); 1164 } 1165 } 1166 1167 static void 1168 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1169 struct spdk_nvme_transport_id *trid, 1170 struct spdk_nvmf_discovery_log_page_entry *entry) 1171 { 1172 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1173 entry->adrfam = trid->adrfam; 1174 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED; 1175 1176 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1177 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1178 1179 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1180 } 1181 1182 static struct spdk_nvmf_transport_poll_group * 1183 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) 1184 { 1185 struct spdk_nvmf_tcp_poll_group *tgroup; 1186 1187 tgroup = calloc(1, sizeof(*tgroup)); 1188 if (!tgroup) { 1189 return NULL; 1190 } 1191 1192 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1193 if (!tgroup->sock_group) { 1194 goto cleanup; 1195 } 1196 1197 TAILQ_INIT(&tgroup->qpairs); 1198 TAILQ_INIT(&tgroup->pending_data_buf_queue); 1199 1200 return &tgroup->group; 1201 1202 cleanup: 1203 free(tgroup); 1204 return NULL; 1205 } 1206 1207 static struct spdk_nvmf_transport_poll_group * 1208 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1209 { 1210 struct spdk_nvmf_tcp_qpair *tqpair; 1211 struct spdk_sock_group *group = NULL; 1212 int rc; 1213 1214 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1215 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group); 1216 if (!rc && group != NULL) { 1217 return spdk_sock_group_get_ctx(group); 1218 } 1219 1220 return NULL; 1221 } 1222 1223 static void 1224 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1225 { 1226 struct spdk_nvmf_tcp_poll_group *tgroup; 1227 1228 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1229 spdk_sock_group_close(&tgroup->sock_group); 1230 1231 if (!TAILQ_EMPTY(&tgroup->pending_data_buf_queue)) { 1232 SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n"); 1233 } 1234 1235 free(tgroup); 1236 } 1237 1238 static void 1239 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1240 enum nvme_tcp_pdu_recv_state state) 1241 { 1242 if (tqpair->recv_state == state) { 1243 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1244 tqpair, state); 1245 return; 1246 } 1247 1248 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state); 1249 tqpair->recv_state = state; 1250 1251 switch (state) { 1252 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1253 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1254 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1255 break; 1256 case NVME_TCP_PDU_RECV_STATE_ERROR: 1257 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1258 memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress)); 1259 break; 1260 default: 1261 SPDK_ERRLOG("The state(%d) is invalid\n", state); 1262 abort(); 1263 break; 1264 } 1265 } 1266 1267 static int 1268 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx) 1269 { 1270 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1271 1272 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1273 1274 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1275 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1276 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; 1277 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair); 1278 spdk_poller_unregister(&tqpair->timeout_poller); 1279 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 1280 1281 return 0; 1282 } 1283 1284 static void 1285 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1286 { 1287 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1288 1289 if (!tqpair->timeout_poller) { 1290 tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair, 1291 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1292 } 1293 } 1294 1295 static void 1296 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1297 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1298 { 1299 struct nvme_tcp_pdu *rsp_pdu; 1300 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1301 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1302 uint32_t copy_len; 1303 1304 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1305 if (!rsp_pdu) { 1306 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1307 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1308 return; 1309 } 1310 1311 c2h_term_req = &rsp_pdu->hdr.term_req; 1312 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1313 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1314 1315 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1316 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1317 DSET32(&c2h_term_req->fei, error_offset); 1318 } 1319 1320 copy_len = pdu->hdr.common.hlen; 1321 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 1322 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 1323 } 1324 1325 /* Copy the error info into the buffer */ 1326 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1327 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1328 1329 /* Contain the header of the wrong received pdu */ 1330 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1331 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1332 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair); 1333 } 1334 1335 static void 1336 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1337 struct spdk_nvmf_tcp_qpair *tqpair, 1338 struct nvme_tcp_pdu *pdu) 1339 { 1340 struct spdk_nvmf_tcp_req *tcp_req; 1341 1342 tcp_req = spdk_nvmf_tcp_req_get(tqpair); 1343 if (!tcp_req) { 1344 SPDK_ERRLOG("Cannot allocate tcp_req\n"); 1345 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1346 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1347 return; 1348 } 1349 1350 pdu->ctx = tcp_req; 1351 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 1352 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1353 return; 1354 } 1355 1356 static void 1357 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1358 struct spdk_nvmf_tcp_qpair *tqpair, 1359 struct nvme_tcp_pdu *pdu) 1360 { 1361 struct spdk_nvmf_tcp_req *tcp_req; 1362 struct spdk_nvme_tcp_cmd *capsule_cmd; 1363 uint32_t error_offset = 0; 1364 enum spdk_nvme_tcp_term_req_fes fes; 1365 1366 capsule_cmd = &pdu->hdr.capsule_cmd; 1367 tcp_req = pdu->ctx; 1368 assert(tcp_req != NULL); 1369 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1370 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1371 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1372 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1373 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1374 goto err; 1375 } 1376 1377 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1378 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1379 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1380 1381 return; 1382 err: 1383 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1384 } 1385 1386 static void 1387 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1388 struct spdk_nvmf_tcp_qpair *tqpair, 1389 struct nvme_tcp_pdu *pdu) 1390 { 1391 struct spdk_nvmf_tcp_req *tcp_req; 1392 uint32_t error_offset = 0; 1393 enum spdk_nvme_tcp_term_req_fes fes = 0; 1394 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1395 bool ttag_offset_error = false; 1396 1397 h2c_data = &pdu->hdr.h2c_data; 1398 1399 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1400 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1401 1402 /* According to the information in the pdu to find the req */ 1403 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER], 1404 state_link) { 1405 if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) { 1406 break; 1407 } 1408 1409 if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) { 1410 ttag_offset_error = true; 1411 } 1412 } 1413 1414 if (!tcp_req) { 1415 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair); 1416 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 1417 if (!ttag_offset_error) { 1418 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1419 } else { 1420 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1421 } 1422 goto err; 1423 } 1424 1425 if (tcp_req->next_expected_r2t_offset != h2c_data->datao) { 1426 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1427 "tcp_req(%p), tqpair=%p, expected_r2t_offset=%u, but data offset =%u\n", 1428 tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao); 1429 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1430 goto err; 1431 } 1432 1433 if (h2c_data->datal > tqpair->maxh2cdata) { 1434 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p, datao=%u execeeds maxh2cdata size=%u\n", 1435 tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata); 1436 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1437 goto err; 1438 } 1439 1440 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1441 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1442 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n", 1443 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1444 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1445 goto err; 1446 } 1447 1448 pdu->ctx = tcp_req; 1449 1450 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 1451 pdu->dif_ctx = &tcp_req->dif_ctx; 1452 } 1453 1454 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 1455 h2c_data->datao, h2c_data->datal); 1456 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1457 return; 1458 1459 err: 1460 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1461 } 1462 1463 static void 1464 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg) 1465 { 1466 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1467 nvmf_tcp_request_free(tcp_req); 1468 } 1469 1470 static void 1471 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 1472 struct spdk_nvmf_tcp_qpair *tqpair) 1473 { 1474 struct nvme_tcp_pdu *rsp_pdu; 1475 struct spdk_nvme_tcp_rsp *capsule_resp; 1476 1477 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair); 1478 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1479 if (!rsp_pdu) { 1480 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1481 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1482 return; 1483 } 1484 1485 capsule_resp = &rsp_pdu->hdr.capsule_resp; 1486 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1487 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 1488 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 1489 if (tqpair->host_hdgst_enable) { 1490 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1491 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1492 } 1493 1494 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req); 1495 } 1496 1497 static void 1498 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 1499 { 1500 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1501 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 1502 struct spdk_nvmf_tcp_qpair, qpair); 1503 1504 assert(tqpair != NULL); 1505 assert(tcp_req->c2h_data_pdu_num > 0); 1506 tcp_req->c2h_data_pdu_num--; 1507 if (!tcp_req->c2h_data_pdu_num) { 1508 if (tqpair->qpair.transport->opts.c2h_success) { 1509 nvmf_tcp_request_free(tcp_req); 1510 } else { 1511 spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 1512 } 1513 } 1514 1515 tqpair->c2h_data_pdu_cnt--; 1516 spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair); 1517 } 1518 1519 static void 1520 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1521 struct spdk_nvmf_tcp_req *tcp_req) 1522 { 1523 struct nvme_tcp_pdu *rsp_pdu; 1524 struct spdk_nvme_tcp_r2t_hdr *r2t; 1525 1526 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1527 if (!rsp_pdu) { 1528 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1529 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1530 return; 1531 } 1532 1533 r2t = &rsp_pdu->hdr.r2t; 1534 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 1535 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 1536 1537 if (tqpair->host_hdgst_enable) { 1538 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1539 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1540 } 1541 1542 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 1543 r2t->ttag = tcp_req->ttag; 1544 r2t->r2to = tcp_req->next_expected_r2t_offset; 1545 r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata); 1546 tcp_req->r2tl_remain = r2t->r2tl; 1547 1548 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1549 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 1550 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 1551 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL); 1552 } 1553 1554 static void 1555 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1556 struct spdk_nvmf_tcp_qpair *tqpair, 1557 struct nvme_tcp_pdu *pdu) 1558 { 1559 struct spdk_nvmf_tcp_req *tcp_req; 1560 1561 tcp_req = pdu->ctx; 1562 assert(tcp_req != NULL); 1563 1564 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 1565 1566 tcp_req->next_expected_r2t_offset += pdu->data_len; 1567 tcp_req->r2tl_remain -= pdu->data_len; 1568 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1569 1570 if (!tcp_req->r2tl_remain) { 1571 if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) { 1572 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1573 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1574 } else { 1575 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair); 1576 spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 1577 } 1578 } 1579 } 1580 1581 static void 1582 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 1583 { 1584 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 1585 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 1586 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1587 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1588 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n", 1589 DGET32(h2c_term_req->fei)); 1590 } 1591 } 1592 1593 static void 1594 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1595 struct nvme_tcp_pdu *pdu) 1596 { 1597 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1598 uint32_t error_offset = 0; 1599 enum spdk_nvme_tcp_term_req_fes fes; 1600 1601 1602 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1603 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 1604 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1605 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1606 goto end; 1607 } 1608 1609 /* set the data buffer */ 1610 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 1611 h2c_term_req->common.plen - h2c_term_req->common.hlen); 1612 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1613 return; 1614 end: 1615 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1616 return; 1617 } 1618 1619 static void 1620 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1621 struct nvme_tcp_pdu *pdu) 1622 { 1623 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1624 1625 spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req); 1626 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1627 return; 1628 } 1629 1630 static void 1631 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1632 { 1633 int rc = 0; 1634 struct nvme_tcp_pdu *pdu; 1635 uint32_t crc32c, error_offset = 0; 1636 enum spdk_nvme_tcp_term_req_fes fes; 1637 struct spdk_nvmf_tcp_transport *ttransport; 1638 1639 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1640 pdu = &tqpair->pdu_in_progress; 1641 1642 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 1643 /* check data digest if need */ 1644 if (pdu->ddgst_enable) { 1645 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1646 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1647 if (rc == 0) { 1648 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1649 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1650 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1651 return; 1652 1653 } 1654 } 1655 1656 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 1657 switch (pdu->hdr.common.pdu_type) { 1658 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1659 spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 1660 break; 1661 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1662 spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 1663 break; 1664 1665 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1666 spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 1667 break; 1668 1669 default: 1670 /* The code should not go to here */ 1671 SPDK_ERRLOG("The code should not go to here\n"); 1672 break; 1673 } 1674 } 1675 1676 static void 1677 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg) 1678 { 1679 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 1680 1681 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1682 } 1683 1684 static void 1685 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 1686 struct spdk_nvmf_tcp_qpair *tqpair, 1687 struct nvme_tcp_pdu *pdu) 1688 { 1689 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 1690 struct nvme_tcp_pdu *rsp_pdu; 1691 struct spdk_nvme_tcp_ic_resp *ic_resp; 1692 uint32_t error_offset = 0; 1693 enum spdk_nvme_tcp_term_req_fes fes; 1694 1695 /* Only PFV 0 is defined currently */ 1696 if (ic_req->pfv != 0) { 1697 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 1698 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1699 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 1700 goto end; 1701 } 1702 1703 /* MAXR2T is 0's based */ 1704 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 1705 1706 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 1707 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 1708 1709 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 1710 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 1711 1712 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1713 if (!rsp_pdu) { 1714 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1715 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1716 return; 1717 } 1718 1719 ic_resp = &rsp_pdu->hdr.ic_resp; 1720 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 1721 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 1722 ic_resp->pfv = 0; 1723 ic_resp->cpda = tqpair->cpda; 1724 tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE, 1725 ttransport->transport.opts.io_unit_size); 1726 ic_resp->maxh2cdata = tqpair->maxh2cdata; 1727 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 1728 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 1729 1730 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 1731 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 1732 1733 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair); 1734 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1735 return; 1736 end: 1737 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1738 return; 1739 } 1740 1741 static void 1742 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1743 { 1744 struct nvme_tcp_pdu *pdu; 1745 int rc; 1746 uint32_t crc32c, error_offset = 0; 1747 enum spdk_nvme_tcp_term_req_fes fes; 1748 struct spdk_nvmf_tcp_transport *ttransport; 1749 1750 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1751 pdu = &tqpair->pdu_in_progress; 1752 1753 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair, 1754 pdu->hdr.common.pdu_type); 1755 /* check header digest if needed */ 1756 if (pdu->has_hdgst) { 1757 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 1758 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1759 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1760 if (rc == 0) { 1761 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1762 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1763 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1764 return; 1765 1766 } 1767 } 1768 1769 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 1770 switch (pdu->hdr.common.pdu_type) { 1771 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 1772 spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 1773 break; 1774 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1775 spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 1776 break; 1777 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1778 spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 1779 break; 1780 1781 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1782 spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 1783 break; 1784 1785 default: 1786 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type); 1787 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1788 error_offset = 1; 1789 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1790 break; 1791 } 1792 } 1793 1794 static void 1795 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1796 { 1797 struct nvme_tcp_pdu *pdu; 1798 uint32_t error_offset = 0; 1799 enum spdk_nvme_tcp_term_req_fes fes; 1800 uint8_t expected_hlen, pdo; 1801 bool plen_error = false, pdo_error = false; 1802 1803 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1804 pdu = &tqpair->pdu_in_progress; 1805 1806 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 1807 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 1808 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 1809 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1810 goto err; 1811 } 1812 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 1813 if (pdu->hdr.common.plen != expected_hlen) { 1814 plen_error = true; 1815 } 1816 } else { 1817 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1818 SPDK_ERRLOG("The TCP/IP connection is not negotitated\n"); 1819 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1820 goto err; 1821 } 1822 1823 switch (pdu->hdr.common.pdu_type) { 1824 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1825 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 1826 pdo = pdu->hdr.common.pdo; 1827 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1828 pdo_error = true; 1829 break; 1830 } 1831 1832 if (pdu->hdr.common.plen < expected_hlen) { 1833 plen_error = true; 1834 } 1835 break; 1836 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1837 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 1838 pdo = pdu->hdr.common.pdo; 1839 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1840 pdo_error = true; 1841 break; 1842 } 1843 if (pdu->hdr.common.plen < expected_hlen) { 1844 plen_error = true; 1845 } 1846 break; 1847 1848 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1849 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 1850 if ((pdu->hdr.common.plen <= expected_hlen) || 1851 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 1852 plen_error = true; 1853 } 1854 break; 1855 1856 default: 1857 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 1858 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1859 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 1860 goto err; 1861 } 1862 } 1863 1864 if (pdu->hdr.common.hlen != expected_hlen) { 1865 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 1866 pdu->hdr.common.pdu_type, 1867 expected_hlen, pdu->hdr.common.hlen, tqpair); 1868 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1869 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 1870 goto err; 1871 } else if (pdo_error) { 1872 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1873 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1874 } else if (plen_error) { 1875 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1876 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 1877 goto err; 1878 } else { 1879 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1880 nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable); 1881 return; 1882 } 1883 err: 1884 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1885 } 1886 1887 static int 1888 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset, 1889 int read_len) 1890 { 1891 int rc; 1892 1893 rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 1894 read_offset, read_len, pdu->dif_ctx); 1895 if (rc != 0) { 1896 SPDK_ERRLOG("DIF generate failed\n"); 1897 } 1898 1899 return rc; 1900 } 1901 1902 #define MAX_NVME_TCP_PDU_LOOP_COUNT 32 1903 1904 static int 1905 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 1906 { 1907 int rc = 0; 1908 struct nvme_tcp_pdu *pdu; 1909 enum nvme_tcp_pdu_recv_state prev_state; 1910 uint32_t data_len, current_pdu_num = 0; 1911 1912 /* The loop here is to allow for several back-to-back state changes. */ 1913 do { 1914 prev_state = tqpair->recv_state; 1915 1916 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 1917 1918 switch (tqpair->recv_state) { 1919 /* Wait for the common header */ 1920 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1921 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1922 pdu = &tqpair->pdu_in_progress; 1923 1924 rc = nvme_tcp_read_data(tqpair->sock, 1925 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1926 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 1927 if (rc < 0) { 1928 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair); 1929 return NVME_TCP_PDU_FATAL; 1930 } else if (rc > 0) { 1931 pdu->ch_valid_bytes += rc; 1932 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); 1933 if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { 1934 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1935 } 1936 } 1937 1938 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1939 return NVME_TCP_PDU_IN_PROGRESS; 1940 } 1941 1942 /* The command header of this PDU has now been read from the socket. */ 1943 spdk_nvmf_tcp_pdu_ch_handle(tqpair); 1944 break; 1945 /* Wait for the pdu specific header */ 1946 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1947 pdu = &tqpair->pdu_in_progress; 1948 rc = nvme_tcp_read_data(tqpair->sock, 1949 pdu->psh_len - pdu->psh_valid_bytes, 1950 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1951 if (rc < 0) { 1952 return NVME_TCP_PDU_FATAL; 1953 } else if (rc > 0) { 1954 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 1955 0, rc, 0, 0); 1956 pdu->psh_valid_bytes += rc; 1957 } 1958 if (pdu->psh_valid_bytes < pdu->psh_len) { 1959 return NVME_TCP_PDU_IN_PROGRESS; 1960 } 1961 1962 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1963 spdk_nvmf_tcp_pdu_psh_handle(tqpair); 1964 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1965 current_pdu_num++; 1966 } 1967 break; 1968 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1969 pdu = &tqpair->pdu_in_progress; 1970 1971 /* check whether the data is valid, if not we just return */ 1972 if (!pdu->data_len) { 1973 return NVME_TCP_PDU_IN_PROGRESS; 1974 } 1975 1976 data_len = pdu->data_len; 1977 /* data digest */ 1978 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 1979 tqpair->host_ddgst_enable)) { 1980 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1981 pdu->ddgst_enable = true; 1982 } 1983 1984 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1985 if (rc < 0) { 1986 return NVME_TCP_PDU_IN_PROGRESS; 1987 } 1988 pdu->readv_offset += rc; 1989 1990 if (spdk_unlikely(pdu->dif_ctx != NULL)) { 1991 rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc); 1992 if (rc != 0) { 1993 return NVME_TCP_PDU_FATAL; 1994 } 1995 } 1996 1997 if (pdu->readv_offset < data_len) { 1998 return NVME_TCP_PDU_IN_PROGRESS; 1999 } 2000 2001 /* All of this PDU has now been read from the socket. */ 2002 spdk_nvmf_tcp_pdu_payload_handle(tqpair); 2003 current_pdu_num++; 2004 break; 2005 case NVME_TCP_PDU_RECV_STATE_ERROR: 2006 pdu = &tqpair->pdu_in_progress; 2007 /* Check whether the connection is closed. Each time, we only read 1 byte every time */ 2008 rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr.common); 2009 if (rc < 0) { 2010 return NVME_TCP_PDU_FATAL; 2011 } 2012 break; 2013 default: 2014 assert(0); 2015 SPDK_ERRLOG("code should not come to here"); 2016 break; 2017 } 2018 } while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT)); 2019 2020 return rc; 2021 } 2022 2023 static enum spdk_nvme_data_transfer 2024 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) { 2025 enum spdk_nvme_data_transfer xfer; 2026 struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd; 2027 struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1; 2028 2029 /* Figure out data transfer direction */ 2030 if (cmd->opc == SPDK_NVME_OPC_FABRIC) 2031 { 2032 xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype); 2033 } else 2034 { 2035 xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2036 2037 /* Some admin commands are special cases */ 2038 if ((tcp_req->req.qpair->qid == 0) && 2039 ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) || 2040 (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) { 2041 switch (cmd->cdw10 & 0xff) { 2042 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 2043 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 2044 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 2045 break; 2046 default: 2047 xfer = SPDK_NVME_DATA_NONE; 2048 } 2049 } 2050 } 2051 2052 if (xfer == SPDK_NVME_DATA_NONE) 2053 { 2054 return xfer; 2055 } 2056 2057 /* Even for commands that may transfer data, they could have specified 0 length. 2058 * We want those to show up with xfer SPDK_NVME_DATA_NONE. 2059 */ 2060 switch (sgl->generic.type) 2061 { 2062 case SPDK_NVME_SGL_TYPE_DATA_BLOCK: 2063 case SPDK_NVME_SGL_TYPE_BIT_BUCKET: 2064 case SPDK_NVME_SGL_TYPE_SEGMENT: 2065 case SPDK_NVME_SGL_TYPE_LAST_SEGMENT: 2066 case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK: 2067 if (sgl->unkeyed.length == 0) { 2068 xfer = SPDK_NVME_DATA_NONE; 2069 } 2070 break; 2071 case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK: 2072 if (sgl->keyed.length == 0) { 2073 xfer = SPDK_NVME_DATA_NONE; 2074 } 2075 break; 2076 } 2077 2078 return xfer; 2079 } 2080 2081 static void 2082 spdk_nvmf_tcp_request_free_buffers(struct spdk_nvmf_tcp_req *tcp_req, 2083 struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport) 2084 { 2085 for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) { 2086 assert(tcp_req->buffers[i] != NULL); 2087 if (group->buf_cache_count < group->buf_cache_size) { 2088 STAILQ_INSERT_HEAD(&group->buf_cache, 2089 (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link); 2090 group->buf_cache_count++; 2091 } else { 2092 spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]); 2093 } 2094 tcp_req->req.iov[i].iov_base = NULL; 2095 tcp_req->buffers[i] = NULL; 2096 tcp_req->req.iov[i].iov_len = 0; 2097 } 2098 tcp_req->data_from_pool = false; 2099 } 2100 2101 static int 2102 spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport, 2103 struct spdk_nvmf_tcp_req *tcp_req, uint32_t length) 2104 { 2105 void *buf = NULL; 2106 uint32_t i = 0; 2107 struct spdk_nvmf_tcp_qpair *tqpair; 2108 struct spdk_nvmf_transport_poll_group *group; 2109 2110 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2111 group = &tqpair->group->group; 2112 2113 tcp_req->req.iovcnt = 0; 2114 while (length) { 2115 if (!(STAILQ_EMPTY(&group->buf_cache))) { 2116 group->buf_cache_count--; 2117 buf = STAILQ_FIRST(&group->buf_cache); 2118 STAILQ_REMOVE_HEAD(&group->buf_cache, link); 2119 } else { 2120 buf = spdk_mempool_get(ttransport->transport.data_buf_pool); 2121 if (!buf) { 2122 goto nomem; 2123 } 2124 } 2125 2126 tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) & 2127 ~NVMF_DATA_BUFFER_MASK); 2128 tcp_req->req.iov[i].iov_len = spdk_min(length, ttransport->transport.opts.io_unit_size); 2129 tcp_req->req.iovcnt++; 2130 tcp_req->buffers[i] = buf; 2131 length -= tcp_req->req.iov[i].iov_len; 2132 i++; 2133 } 2134 2135 assert(tcp_req->req.iovcnt <= SPDK_NVMF_MAX_SGL_ENTRIES); 2136 tcp_req->data_from_pool = true; 2137 return 0; 2138 2139 nomem: 2140 spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); 2141 tcp_req->req.iovcnt = 0; 2142 return -ENOMEM; 2143 } 2144 2145 static int 2146 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_transport *ttransport, 2147 struct spdk_nvmf_tcp_req *tcp_req) 2148 { 2149 struct spdk_nvme_cmd *cmd; 2150 struct spdk_nvme_cpl *rsp; 2151 struct spdk_nvme_sgl_descriptor *sgl; 2152 uint32_t length; 2153 2154 cmd = &tcp_req->req.cmd->nvme_cmd; 2155 rsp = &tcp_req->req.rsp->nvme_cpl; 2156 sgl = &cmd->dptr.sgl1; 2157 2158 length = sgl->unkeyed.length; 2159 2160 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 2161 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 2162 if (length > ttransport->transport.opts.max_io_size) { 2163 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 2164 length, ttransport->transport.opts.max_io_size); 2165 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 2166 return -1; 2167 } 2168 2169 /* fill request length and populate iovs */ 2170 tcp_req->req.length = length; 2171 2172 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length); 2173 2174 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2175 length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx); 2176 tcp_req->elba_length = length; 2177 } 2178 2179 if (spdk_nvmf_tcp_req_fill_iovs(ttransport, tcp_req, length) < 0) { 2180 /* No available buffers. Queue this request up. */ 2181 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n", 2182 tcp_req); 2183 return 0; 2184 } 2185 2186 /* backward compatible */ 2187 tcp_req->req.data = tcp_req->req.iov[0].iov_base; 2188 2189 2190 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n", 2191 tcp_req, 2192 tcp_req->req.iovcnt, tcp_req->req.data); 2193 2194 return 0; 2195 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2196 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2197 uint64_t offset = sgl->address; 2198 uint32_t max_len = ttransport->transport.opts.in_capsule_data_size; 2199 2200 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2201 offset, length); 2202 2203 if (offset > max_len) { 2204 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", 2205 offset, max_len); 2206 rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; 2207 return -1; 2208 } 2209 max_len -= (uint32_t)offset; 2210 2211 if (length > max_len) { 2212 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2213 length, max_len); 2214 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 2215 return -1; 2216 } 2217 2218 tcp_req->req.data = tcp_req->buf + offset; 2219 tcp_req->data_from_pool = false; 2220 tcp_req->req.length = length; 2221 2222 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2223 length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx); 2224 tcp_req->elba_length = length; 2225 } 2226 2227 tcp_req->req.iov[0].iov_base = tcp_req->req.data; 2228 tcp_req->req.iov[0].iov_len = length; 2229 tcp_req->req.iovcnt = 1; 2230 2231 return 0; 2232 } 2233 2234 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2235 sgl->generic.type, sgl->generic.subtype); 2236 rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; 2237 return -1; 2238 } 2239 2240 static int 2241 nvmf_tcp_pdu_verify_dif(struct nvme_tcp_pdu *pdu, 2242 const struct spdk_dif_ctx *dif_ctx) 2243 { 2244 struct spdk_dif_error err_blk = {}; 2245 int rc; 2246 2247 rc = spdk_dif_verify_stream(pdu->data_iov, pdu->data_iovcnt, 2248 0, pdu->data_len, pdu->dif_ctx, &err_blk); 2249 if (rc != 0) { 2250 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2251 err_blk.err_type, err_blk.err_offset); 2252 } 2253 2254 return rc; 2255 } 2256 2257 static void 2258 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2259 struct spdk_nvmf_tcp_req *tcp_req) 2260 { 2261 struct nvme_tcp_pdu *rsp_pdu; 2262 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2263 uint32_t plen, pdo, alignment; 2264 int rc; 2265 2266 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2267 2268 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 2269 assert(rsp_pdu != NULL); 2270 2271 c2h_data = &rsp_pdu->hdr.c2h_data; 2272 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2273 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2274 2275 if (tqpair->host_hdgst_enable) { 2276 plen += SPDK_NVME_TCP_DIGEST_LEN; 2277 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2278 } 2279 2280 /* set the psh */ 2281 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2282 c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE, 2283 tcp_req->req.length - tcp_req->c2h_data_offset); 2284 c2h_data->datao = tcp_req->c2h_data_offset; 2285 2286 /* set the padding */ 2287 rsp_pdu->padding_len = 0; 2288 pdo = plen; 2289 if (tqpair->cpda) { 2290 alignment = (tqpair->cpda + 1) << 2; 2291 if (alignment > plen) { 2292 rsp_pdu->padding_len = alignment - plen; 2293 pdo = plen = alignment; 2294 } 2295 } 2296 2297 c2h_data->common.pdo = pdo; 2298 plen += c2h_data->datal; 2299 if (tqpair->host_ddgst_enable) { 2300 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2301 plen += SPDK_NVME_TCP_DIGEST_LEN; 2302 } 2303 2304 c2h_data->common.plen = plen; 2305 2306 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2307 rsp_pdu->dif_ctx = &tcp_req->dif_ctx; 2308 } 2309 2310 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2311 c2h_data->datao, c2h_data->datal); 2312 2313 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2314 rc = nvmf_tcp_pdu_verify_dif(rsp_pdu, rsp_pdu->dif_ctx); 2315 if (rc != 0) { 2316 /* Data digest error detected by the NVMe/TCP target is treated as non-fatal 2317 * transport error because the cause will be outside the NVMe/TCP target. 2318 * 2319 * On the other hand, treat DIF check error as fatal transport error here 2320 * here because the error is caused by the target itself. Fatal NVMe/TCP 2321 * transport error is handled by terminating the connection. 2322 */ 2323 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 2324 return; 2325 } 2326 } 2327 2328 tcp_req->c2h_data_offset += c2h_data->datal; 2329 if (tcp_req->c2h_data_offset == tcp_req->req.length) { 2330 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair); 2331 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2332 if (tqpair->qpair.transport->opts.c2h_success) { 2333 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2334 } 2335 TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 2336 } 2337 2338 tqpair->c2h_data_pdu_cnt += 1; 2339 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2340 } 2341 2342 static int 2343 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req) 2344 { 2345 return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) / 2346 NVMF_TCP_PDU_MAX_C2H_DATA_SIZE; 2347 } 2348 2349 static void 2350 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair) 2351 { 2352 struct spdk_nvmf_tcp_req *tcp_req; 2353 2354 while (!TAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) && 2355 (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) { 2356 tcp_req = TAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req); 2357 spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2358 } 2359 } 2360 2361 static void 2362 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req, 2363 struct spdk_nvmf_tcp_qpair *tqpair) 2364 { 2365 tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req); 2366 2367 assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM); 2368 2369 TAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 2370 spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair); 2371 } 2372 2373 static int 2374 request_transfer_out(struct spdk_nvmf_request *req) 2375 { 2376 struct spdk_nvmf_tcp_req *tcp_req; 2377 struct spdk_nvmf_qpair *qpair; 2378 struct spdk_nvmf_tcp_qpair *tqpair; 2379 struct spdk_nvme_cpl *rsp; 2380 2381 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2382 2383 qpair = req->qpair; 2384 rsp = &req->rsp->nvme_cpl; 2385 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2386 2387 /* Advance our sq_head pointer */ 2388 if (qpair->sq_head == qpair->sq_head_max) { 2389 qpair->sq_head = 0; 2390 } else { 2391 qpair->sq_head++; 2392 } 2393 rsp->sqhd = qpair->sq_head; 2394 2395 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2396 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2397 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && 2398 req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2399 spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair); 2400 } else { 2401 spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2402 } 2403 2404 return 0; 2405 } 2406 2407 static void 2408 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair, 2409 struct spdk_nvmf_tcp_req *tcp_req) 2410 { 2411 struct nvme_tcp_pdu *pdu; 2412 2413 if (tcp_req->data_from_pool) { 2414 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2415 tcp_req->next_expected_r2t_offset = 0; 2416 spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2417 } else { 2418 pdu = &tqpair->pdu_in_progress; 2419 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 2420 tqpair); 2421 /* No need to send r2t, contained in the capsuled data */ 2422 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2423 0, tcp_req->req.length); 2424 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2425 } 2426 } 2427 2428 static void 2429 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair, 2430 struct spdk_nvmf_tcp_req *tcp_req) 2431 { 2432 struct nvme_tcp_pdu *pdu; 2433 uint32_t plen = 0; 2434 2435 pdu = &tqpair->pdu_in_progress; 2436 plen = pdu->hdr.common.hlen; 2437 2438 if (tqpair->host_hdgst_enable) { 2439 plen += SPDK_NVME_TCP_DIGEST_LEN; 2440 } 2441 2442 if (pdu->hdr.common.plen != plen) { 2443 tcp_req->has_incapsule_data = true; 2444 } 2445 } 2446 2447 static bool 2448 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 2449 struct spdk_nvmf_tcp_req *tcp_req) 2450 { 2451 struct spdk_nvmf_tcp_qpair *tqpair; 2452 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2453 int rc; 2454 enum spdk_nvmf_tcp_req_state prev_state; 2455 bool progress = false; 2456 struct spdk_nvmf_transport_poll_group *group; 2457 2458 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2459 group = &tqpair->group->group; 2460 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 2461 2462 /* The loop here is to allow for several back-to-back state changes. */ 2463 do { 2464 prev_state = tcp_req->state; 2465 2466 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 2467 tqpair); 2468 2469 switch (tcp_req->state) { 2470 case TCP_REQUEST_STATE_FREE: 2471 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 2472 * to escape this state. */ 2473 break; 2474 case TCP_REQUEST_STATE_NEW: 2475 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0); 2476 2477 /* copy the cmd from the receive pdu */ 2478 tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe; 2479 2480 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->dif_ctx))) { 2481 tcp_req->dif_insert_or_strip = true; 2482 tqpair->pdu_in_progress.dif_ctx = &tcp_req->dif_ctx; 2483 } 2484 2485 /* The next state transition depends on the data transfer needs of this request. */ 2486 tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req); 2487 2488 /* If no data to transfer, ready to execute. */ 2489 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 2490 /* Reset the tqpair receving pdu state */ 2491 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2492 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2493 break; 2494 } 2495 2496 spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req); 2497 2498 if (!tcp_req->has_incapsule_data) { 2499 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2500 } 2501 2502 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 2503 TAILQ_INSERT_TAIL(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2504 break; 2505 case TCP_REQUEST_STATE_NEED_BUFFER: 2506 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0); 2507 2508 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 2509 2510 if (!tcp_req->has_incapsule_data && 2511 (tcp_req != TAILQ_FIRST(&tqpair->group->pending_data_buf_queue))) { 2512 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 2513 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", 2514 tcp_req, tqpair); 2515 /* This request needs to wait in line to obtain a buffer */ 2516 break; 2517 } 2518 2519 /* Try to get a data buffer */ 2520 rc = spdk_nvmf_tcp_req_parse_sgl(ttransport, tcp_req); 2521 if (rc < 0) { 2522 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2523 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2524 /* Reset the tqpair receving pdu state */ 2525 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2526 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2527 break; 2528 } 2529 2530 if (!tcp_req->req.data) { 2531 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", 2532 tcp_req, tqpair); 2533 /* No buffers available. */ 2534 break; 2535 } 2536 2537 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2538 2539 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 2540 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2541 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2542 spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req); 2543 break; 2544 } 2545 2546 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2547 break; 2548 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2549 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, 2550 (uintptr_t)tcp_req, 0); 2551 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 2552 * to escape this state. */ 2553 break; 2554 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 2555 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0); 2556 2557 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2558 assert(tcp_req->elba_length >= tcp_req->req.length); 2559 tcp_req->orig_length = tcp_req->req.length; 2560 tcp_req->req.length = tcp_req->elba_length; 2561 } 2562 2563 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 2564 spdk_nvmf_request_exec(&tcp_req->req); 2565 break; 2566 case TCP_REQUEST_STATE_EXECUTING: 2567 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0); 2568 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 2569 * to escape this state. */ 2570 break; 2571 case TCP_REQUEST_STATE_EXECUTED: 2572 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0); 2573 2574 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2575 tcp_req->req.length = tcp_req->orig_length; 2576 } 2577 2578 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2579 break; 2580 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 2581 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0); 2582 rc = request_transfer_out(&tcp_req->req); 2583 assert(rc == 0); /* No good way to handle this currently */ 2584 break; 2585 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 2586 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, 2587 (uintptr_t)tcp_req, 2588 0); 2589 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 2590 * to escape this state. */ 2591 break; 2592 case TCP_REQUEST_STATE_COMPLETED: 2593 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); 2594 if (tcp_req->data_from_pool) { 2595 spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); 2596 } 2597 tcp_req->req.length = 0; 2598 tcp_req->req.iovcnt = 0; 2599 tcp_req->req.data = NULL; 2600 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 2601 break; 2602 case TCP_REQUEST_NUM_STATES: 2603 default: 2604 assert(0); 2605 break; 2606 } 2607 2608 if (tcp_req->state != prev_state) { 2609 progress = true; 2610 } 2611 } while (tcp_req->state != prev_state); 2612 2613 return progress; 2614 } 2615 static void 2616 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 2617 { 2618 struct spdk_nvmf_tcp_qpair *tqpair = arg; 2619 int rc; 2620 2621 assert(tqpair != NULL); 2622 rc = spdk_nvmf_tcp_sock_process(tqpair); 2623 2624 /* check the following two factors: 2625 * rc: The socket is closed 2626 * State of tqpair: The tqpair is in EXITING state due to internal error 2627 */ 2628 if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) { 2629 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; 2630 spdk_nvmf_tcp_qpair_flush_pdus(tqpair); 2631 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair); 2632 spdk_poller_unregister(&tqpair->timeout_poller); 2633 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 2634 } 2635 } 2636 2637 static int 2638 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2639 struct spdk_nvmf_qpair *qpair) 2640 { 2641 struct spdk_nvmf_tcp_poll_group *tgroup; 2642 struct spdk_nvmf_tcp_qpair *tqpair; 2643 int rc; 2644 2645 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2646 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2647 2648 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 2649 spdk_nvmf_tcp_sock_cb, tqpair); 2650 if (rc != 0) { 2651 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 2652 spdk_strerror(errno), errno); 2653 spdk_nvmf_tcp_qpair_destroy(tqpair); 2654 return -1; 2655 } 2656 2657 rc = spdk_nvmf_tcp_qpair_sock_init(tqpair); 2658 if (rc != 0) { 2659 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 2660 spdk_nvmf_tcp_qpair_destroy(tqpair); 2661 return -1; 2662 } 2663 2664 rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair); 2665 if (rc < 0) { 2666 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 2667 spdk_nvmf_tcp_qpair_destroy(tqpair); 2668 return -1; 2669 } 2670 2671 rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1); 2672 if (rc < 0) { 2673 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 2674 spdk_nvmf_tcp_qpair_destroy(tqpair); 2675 return -1; 2676 } 2677 2678 tqpair->group = tgroup; 2679 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2680 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 2681 2682 return 0; 2683 } 2684 2685 static int 2686 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2687 struct spdk_nvmf_qpair *qpair) 2688 { 2689 struct spdk_nvmf_tcp_poll_group *tgroup; 2690 struct spdk_nvmf_tcp_qpair *tqpair; 2691 int rc; 2692 2693 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2694 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2695 2696 assert(tqpair->group == tgroup); 2697 2698 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 2699 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 2700 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 2701 if (rc != 0) { 2702 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 2703 spdk_strerror(errno), errno); 2704 } 2705 2706 return rc; 2707 } 2708 2709 static int 2710 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 2711 { 2712 struct spdk_nvmf_tcp_transport *ttransport; 2713 struct spdk_nvmf_tcp_req *tcp_req; 2714 2715 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 2716 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2717 2718 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 2719 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 2720 2721 return 0; 2722 } 2723 2724 static void 2725 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair) 2726 { 2727 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2728 2729 spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair)); 2730 } 2731 2732 static int 2733 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2734 { 2735 struct spdk_nvmf_tcp_poll_group *tgroup; 2736 int rc; 2737 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 2738 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport, 2739 struct spdk_nvmf_tcp_transport, transport); 2740 2741 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2742 2743 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) { 2744 return 0; 2745 } 2746 2747 TAILQ_FOREACH_SAFE(tcp_req, &tgroup->pending_data_buf_queue, link, req_tmp) { 2748 if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) { 2749 break; 2750 } 2751 } 2752 2753 rc = spdk_sock_group_poll(tgroup->sock_group); 2754 if (rc < 0) { 2755 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 2756 } 2757 2758 return rc; 2759 } 2760 2761 static int 2762 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 2763 struct spdk_nvme_transport_id *trid, bool peer) 2764 { 2765 struct spdk_nvmf_tcp_qpair *tqpair; 2766 uint16_t port; 2767 2768 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2769 trid->trtype = SPDK_NVME_TRANSPORT_TCP; 2770 2771 if (peer) { 2772 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 2773 port = tqpair->initiator_port; 2774 } else { 2775 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 2776 port = tqpair->target_port; 2777 } 2778 2779 if (spdk_sock_is_ipv4(tqpair->sock)) { 2780 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 2781 } else if (spdk_sock_is_ipv4(tqpair->sock)) { 2782 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 2783 } else { 2784 return -1; 2785 } 2786 2787 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 2788 return 0; 2789 } 2790 2791 static int 2792 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2793 struct spdk_nvme_transport_id *trid) 2794 { 2795 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2796 } 2797 2798 static int 2799 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2800 struct spdk_nvme_transport_id *trid) 2801 { 2802 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1); 2803 } 2804 2805 static int 2806 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2807 struct spdk_nvme_transport_id *trid) 2808 { 2809 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2810 } 2811 2812 static int 2813 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair) 2814 { 2815 struct spdk_nvmf_tcp_qpair *tqpair; 2816 int rc; 2817 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2818 2819 rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max); 2820 if (!rc) { 2821 tqpair->max_queue_depth += tqpair->qpair.sq_head_max; 2822 tqpair->free_pdu_num += tqpair->qpair.sq_head_max; 2823 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max; 2824 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n", 2825 tqpair->max_queue_depth, tqpair); 2826 } 2827 2828 return rc; 2829 2830 } 2831 2832 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128 2833 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128 2834 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 2835 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 2836 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 2837 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 2838 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 2839 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32 2840 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 2841 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 2842 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 2843 2844 static void 2845 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 2846 { 2847 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH; 2848 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2849 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 2850 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 2851 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 2852 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH; 2853 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 2854 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 2855 opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 2856 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 2857 opts->sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 2858 } 2859 2860 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 2861 .type = SPDK_NVME_TRANSPORT_TCP, 2862 .opts_init = spdk_nvmf_tcp_opts_init, 2863 .create = spdk_nvmf_tcp_create, 2864 .destroy = spdk_nvmf_tcp_destroy, 2865 2866 .listen = spdk_nvmf_tcp_listen, 2867 .stop_listen = spdk_nvmf_tcp_stop_listen, 2868 .accept = spdk_nvmf_tcp_accept, 2869 2870 .listener_discover = spdk_nvmf_tcp_discover, 2871 2872 .poll_group_create = spdk_nvmf_tcp_poll_group_create, 2873 .get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group, 2874 .poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy, 2875 .poll_group_add = spdk_nvmf_tcp_poll_group_add, 2876 .poll_group_remove = spdk_nvmf_tcp_poll_group_remove, 2877 .poll_group_poll = spdk_nvmf_tcp_poll_group_poll, 2878 2879 .req_free = spdk_nvmf_tcp_req_free, 2880 .req_complete = spdk_nvmf_tcp_req_complete, 2881 2882 .qpair_fini = spdk_nvmf_tcp_close_qpair, 2883 .qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid, 2884 .qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid, 2885 .qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid, 2886 .qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size, 2887 }; 2888 2889 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP) 2890