1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 #include "spdk/crc32.h" 36 #include "spdk/endian.h" 37 #include "spdk/assert.h" 38 #include "spdk/thread.h" 39 #include "spdk/nvmf.h" 40 #include "spdk/nvmf_spec.h" 41 #include "spdk/sock.h" 42 #include "spdk/string.h" 43 #include "spdk/trace.h" 44 #include "spdk/util.h" 45 46 #include "nvmf_internal.h" 47 #include "transport.h" 48 49 #include "spdk_internal/log.h" 50 #include "spdk_internal/nvme_tcp.h" 51 52 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 53 54 #define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE 131072 55 #define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072 56 #define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */ 57 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6 58 59 /* spdk nvmf related structure */ 60 enum spdk_nvmf_tcp_req_state { 61 62 /* The request is not currently in use */ 63 TCP_REQUEST_STATE_FREE = 0, 64 65 /* Initial state when request first received */ 66 TCP_REQUEST_STATE_NEW, 67 68 /* The request is queued until a data buffer is available. */ 69 TCP_REQUEST_STATE_NEED_BUFFER, 70 71 /* The request is currently transferring data from the host to the controller. */ 72 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 73 74 /* The request is ready to execute at the block device */ 75 TCP_REQUEST_STATE_READY_TO_EXECUTE, 76 77 /* The request is currently executing at the block device */ 78 TCP_REQUEST_STATE_EXECUTING, 79 80 /* The request finished executing at the block device */ 81 TCP_REQUEST_STATE_EXECUTED, 82 83 /* The request is ready to send a completion */ 84 TCP_REQUEST_STATE_READY_TO_COMPLETE, 85 86 /* The request is currently transferring final pdus from the controller to the host. */ 87 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 88 89 /* The request completed and can be marked free. */ 90 TCP_REQUEST_STATE_COMPLETED, 91 92 /* Terminator */ 93 TCP_REQUEST_NUM_STATES, 94 }; 95 96 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 97 "Invalid PDU Header Field", 98 "PDU Sequence Error", 99 "Header Digiest Error", 100 "Data Transfer Out of Range", 101 "R2T Limit Exceeded", 102 "Unsupported parameter", 103 }; 104 105 #define OBJECT_NVMF_TCP_IO 0x80 106 107 #define TRACE_GROUP_NVMF_TCP 0x5 108 #define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0) 109 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1) 110 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2) 111 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3) 112 #define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4) 113 #define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5) 114 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6) 115 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7) 116 #define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8) 117 #define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9) 118 #define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA) 119 #define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB) 120 121 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 122 { 123 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 124 spdk_trace_register_description("TCP_REQ_NEW", 125 TRACE_TCP_REQUEST_STATE_NEW, 126 OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, ""); 127 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 128 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 129 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 130 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 131 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 132 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 133 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 134 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 135 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 136 spdk_trace_register_description("TCP_REQ_EXECUTING", 137 TRACE_TCP_REQUEST_STATE_EXECUTING, 138 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 139 spdk_trace_register_description("TCP_REQ_EXECUTED", 140 TRACE_TCP_REQUEST_STATE_EXECUTED, 141 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 142 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 143 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 144 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 145 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 146 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 147 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 148 spdk_trace_register_description("TCP_REQ_COMPLETED", 149 TRACE_TCP_REQUEST_STATE_COMPLETED, 150 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 151 spdk_trace_register_description("TCP_WRITE_START", 152 TRACE_TCP_FLUSH_WRITEBUF_START, 153 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 154 spdk_trace_register_description("TCP_WRITE_DONE", 155 TRACE_TCP_FLUSH_WRITEBUF_DONE, 156 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 157 spdk_trace_register_description("TCP_READ_DONE", 158 TRACE_TCP_READ_FROM_SOCKET_DONE, 159 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 160 } 161 162 struct spdk_nvmf_tcp_req { 163 struct spdk_nvmf_request req; 164 struct spdk_nvme_cpl rsp; 165 struct spdk_nvme_cmd cmd; 166 167 /* In-capsule data buffer */ 168 uint8_t *buf; 169 170 bool data_from_pool; 171 bool has_incapsule_data; 172 173 /* transfer_tag */ 174 uint16_t ttag; 175 176 enum spdk_nvmf_tcp_req_state state; 177 178 void *buffers[SPDK_NVMF_MAX_SGL_ENTRIES]; 179 180 /* 181 * next_expected_r2t_offset is used when we receive the h2c_data PDU. 182 */ 183 uint32_t next_expected_r2t_offset; 184 uint32_t r2tl_remain; 185 186 /* 187 * c2h_data_offset is used when we send the c2h_data PDU. 188 */ 189 uint32_t c2h_data_offset; 190 uint32_t c2h_data_pdu_num; 191 192 struct spdk_dif_ctx dif_ctx; 193 bool dif_insert_or_strip; 194 uint32_t elba_length; 195 uint32_t orig_length; 196 197 TAILQ_ENTRY(spdk_nvmf_tcp_req) link; 198 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 199 }; 200 201 struct spdk_nvmf_tcp_qpair { 202 struct spdk_nvmf_qpair qpair; 203 struct spdk_nvmf_tcp_poll_group *group; 204 struct spdk_nvmf_tcp_port *port; 205 struct spdk_sock *sock; 206 struct spdk_poller *flush_poller; 207 208 enum nvme_tcp_pdu_recv_state recv_state; 209 enum nvme_tcp_qpair_state state; 210 211 struct nvme_tcp_pdu pdu_in_progress; 212 213 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 214 TAILQ_HEAD(, nvme_tcp_pdu) free_queue; 215 216 struct nvme_tcp_pdu *pdu; 217 struct nvme_tcp_pdu *pdu_pool; 218 uint16_t free_pdu_num; 219 220 /* Queues to track the requests in all states */ 221 TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES]; 222 /* Number of requests in each state */ 223 int32_t state_cntr[TCP_REQUEST_NUM_STATES]; 224 225 TAILQ_HEAD(, spdk_nvmf_tcp_req) queued_c2h_data_tcp_req; 226 227 uint8_t cpda; 228 229 /* Array of size "max_queue_depth * InCapsuleDataSize" containing 230 * buffers to be used for in capsule data. 231 */ 232 void *buf; 233 void *bufs; 234 struct spdk_nvmf_tcp_req *req; 235 struct spdk_nvmf_tcp_req *reqs; 236 237 bool host_hdgst_enable; 238 bool host_ddgst_enable; 239 240 241 /* The maximum number of I/O outstanding on this connection at one time */ 242 uint16_t max_queue_depth; 243 244 245 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 246 uint32_t maxh2cdata; 247 248 uint32_t c2h_data_pdu_cnt; 249 250 /* IP address */ 251 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 252 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 253 254 /* IP port */ 255 uint16_t initiator_port; 256 uint16_t target_port; 257 258 /* Timer used to destroy qpair after detecting transport error issue if initiator does 259 * not close the connection. 260 */ 261 struct spdk_poller *timeout_poller; 262 263 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 264 }; 265 266 struct spdk_nvmf_tcp_poll_group { 267 struct spdk_nvmf_transport_poll_group group; 268 struct spdk_sock_group *sock_group; 269 270 /* Requests that are waiting to obtain a data buffer */ 271 TAILQ_HEAD(, spdk_nvmf_tcp_req) pending_data_buf_queue; 272 273 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 274 }; 275 276 struct spdk_nvmf_tcp_port { 277 struct spdk_nvme_transport_id trid; 278 struct spdk_sock *listen_sock; 279 uint32_t ref; 280 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 281 }; 282 283 struct spdk_nvmf_tcp_transport { 284 struct spdk_nvmf_transport transport; 285 286 pthread_mutex_t lock; 287 288 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 289 }; 290 291 static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport, 292 struct spdk_nvmf_tcp_qpair *tqpair); 293 static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 294 struct spdk_nvmf_tcp_req *tcp_req); 295 static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair); 296 297 static void 298 spdk_nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 299 enum spdk_nvmf_tcp_req_state state) 300 { 301 struct spdk_nvmf_qpair *qpair; 302 struct spdk_nvmf_tcp_qpair *tqpair; 303 304 qpair = tcp_req->req.qpair; 305 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 306 307 TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 308 tqpair->state_cntr[tcp_req->state]--; 309 assert(tqpair->state_cntr[tcp_req->state] >= 0); 310 311 TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link); 312 tqpair->state_cntr[state]++; 313 314 tcp_req->state = state; 315 } 316 317 static struct nvme_tcp_pdu * 318 spdk_nvmf_tcp_pdu_get(struct spdk_nvmf_tcp_qpair *tqpair) 319 { 320 struct nvme_tcp_pdu *pdu; 321 322 pdu = TAILQ_FIRST(&tqpair->free_queue); 323 if (!pdu) { 324 SPDK_ERRLOG("Unable to get PDU for tqpair=%p\n", tqpair); 325 abort(); 326 return NULL; 327 } 328 329 tqpair->free_pdu_num--; 330 TAILQ_REMOVE(&tqpair->free_queue, pdu, tailq); 331 memset(pdu, 0, sizeof(*pdu)); 332 pdu->ref = 1; 333 334 return pdu; 335 } 336 337 static void 338 spdk_nvmf_tcp_pdu_put(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 339 { 340 if (!pdu) { 341 return; 342 } 343 344 assert(pdu->ref > 0); 345 346 pdu->ref--; 347 if (pdu->ref == 0) { 348 tqpair->free_pdu_num++; 349 TAILQ_INSERT_HEAD(&tqpair->free_queue, pdu, tailq); 350 } 351 } 352 353 static struct spdk_nvmf_tcp_req * 354 spdk_nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 355 { 356 struct spdk_nvmf_tcp_req *tcp_req; 357 358 tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]); 359 if (!tcp_req) { 360 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 361 return NULL; 362 } 363 364 memset(&tcp_req->cmd, 0, sizeof(tcp_req->cmd)); 365 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 366 tcp_req->next_expected_r2t_offset = 0; 367 tcp_req->r2tl_remain = 0; 368 tcp_req->c2h_data_offset = 0; 369 tcp_req->has_incapsule_data = false; 370 tcp_req->dif_insert_or_strip = false; 371 372 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 373 return tcp_req; 374 } 375 376 static void 377 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req) 378 { 379 struct spdk_nvmf_tcp_transport *ttransport; 380 381 if (!tcp_req) { 382 return; 383 } 384 385 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req); 386 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 387 struct spdk_nvmf_tcp_transport, transport); 388 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 389 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 390 } 391 392 static int 393 spdk_nvmf_tcp_req_free(struct spdk_nvmf_request *req) 394 { 395 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 396 397 nvmf_tcp_request_free(tcp_req); 398 399 return 0; 400 } 401 402 static void 403 spdk_nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 404 enum spdk_nvmf_tcp_req_state state) 405 { 406 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 407 408 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) { 409 nvmf_tcp_request_free(tcp_req); 410 } 411 } 412 413 static void 414 spdk_nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 415 { 416 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 417 struct nvme_tcp_pdu *pdu, *tmp_pdu; 418 419 /* Free the pdus in the send_queue */ 420 TAILQ_FOREACH_SAFE(pdu, &tqpair->send_queue, tailq, tmp_pdu) { 421 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 422 /* Also check the pdu type, we need to calculte the c2h_data_pdu_cnt later */ 423 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) { 424 assert(tqpair->c2h_data_pdu_cnt > 0); 425 tqpair->c2h_data_pdu_cnt--; 426 } 427 spdk_nvmf_tcp_pdu_put(tqpair, pdu); 428 } 429 430 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->queued_c2h_data_tcp_req, link, req_tmp) { 431 TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 432 } 433 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 434 435 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 436 437 /* Wipe the requests waiting for buffer from the global list */ 438 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link, 439 req_tmp) { 440 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 441 } 442 443 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 444 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 445 spdk_nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 446 } 447 448 static void 449 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 450 { 451 int i; 452 struct spdk_nvmf_tcp_req *tcp_req; 453 454 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 455 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 456 SPDK_ERRLOG("\tNum of requests in state[%d] = %d\n", i, tqpair->state_cntr[i]); 457 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) { 458 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->data_from_pool); 459 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 460 } 461 } 462 } 463 464 static void 465 spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 466 { 467 int err = 0; 468 469 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 470 471 spdk_poller_unregister(&tqpair->flush_poller); 472 spdk_sock_close(&tqpair->sock); 473 spdk_nvmf_tcp_cleanup_all_states(tqpair); 474 475 if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) { 476 SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair, 477 tqpair->free_pdu_num, 478 (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)); 479 err++; 480 } 481 482 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) { 483 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 484 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 485 tqpair->max_queue_depth); 486 err++; 487 } 488 489 if (tqpair->c2h_data_pdu_cnt != 0) { 490 SPDK_ERRLOG("tqpair(%p) free c2h_data_pdu cnt is %u but should be 0\n", tqpair, 491 tqpair->c2h_data_pdu_cnt); 492 err++; 493 } 494 495 if (err > 0) { 496 nvmf_tcp_dump_qpair_req_contents(tqpair); 497 } 498 free(tqpair->pdu); 499 free(tqpair->pdu_pool); 500 free(tqpair->req); 501 free(tqpair->reqs); 502 spdk_free(tqpair->buf); 503 spdk_free(tqpair->bufs); 504 free(tqpair); 505 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n"); 506 } 507 508 static int 509 spdk_nvmf_tcp_destroy(struct spdk_nvmf_transport *transport) 510 { 511 struct spdk_nvmf_tcp_transport *ttransport; 512 513 assert(transport != NULL); 514 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 515 516 pthread_mutex_destroy(&ttransport->lock); 517 free(ttransport); 518 return 0; 519 } 520 521 static struct spdk_nvmf_transport * 522 spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 523 { 524 struct spdk_nvmf_tcp_transport *ttransport; 525 uint32_t sge_count; 526 uint32_t min_shared_buffers; 527 528 ttransport = calloc(1, sizeof(*ttransport)); 529 if (!ttransport) { 530 return NULL; 531 } 532 533 TAILQ_INIT(&ttransport->ports); 534 535 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 536 537 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 538 539 SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n" 540 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 541 " max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 542 " in_capsule_data_size=%d, max_aq_depth=%d\n" 543 " num_shared_buffers=%d, c2h_success=%d,\n" 544 " dif_insert_or_strip=%d, sock_priority=%d\n", 545 opts->max_queue_depth, 546 opts->max_io_size, 547 opts->max_qpairs_per_ctrlr, 548 opts->io_unit_size, 549 opts->in_capsule_data_size, 550 opts->max_aq_depth, 551 opts->num_shared_buffers, 552 opts->c2h_success, 553 opts->dif_insert_or_strip, 554 opts->sock_priority); 555 556 if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 557 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 558 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 559 opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 560 free(ttransport); 561 return NULL; 562 } 563 564 /* I/O unit size cannot be larger than max I/O size */ 565 if (opts->io_unit_size > opts->max_io_size) { 566 opts->io_unit_size = opts->max_io_size; 567 } 568 569 sge_count = opts->max_io_size / opts->io_unit_size; 570 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 571 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 572 free(ttransport); 573 return NULL; 574 } 575 576 min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size; 577 if (min_shared_buffers > opts->num_shared_buffers) { 578 SPDK_ERRLOG("There are not enough buffers to satisfy" 579 "per-poll group caches for each thread. (%" PRIu32 ")" 580 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 581 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 582 spdk_nvmf_tcp_destroy(&ttransport->transport); 583 return NULL; 584 } 585 586 pthread_mutex_init(&ttransport->lock, NULL); 587 588 return &ttransport->transport; 589 } 590 591 static int 592 _spdk_nvmf_tcp_trsvcid_to_int(const char *trsvcid) 593 { 594 unsigned long long ull; 595 char *end = NULL; 596 597 ull = strtoull(trsvcid, &end, 10); 598 if (end == NULL || end == trsvcid || *end != '\0') { 599 return -1; 600 } 601 602 /* Valid TCP/IP port numbers are in [0, 65535] */ 603 if (ull > 65535) { 604 return -1; 605 } 606 607 return (int)ull; 608 } 609 610 /** 611 * Canonicalize a listen address trid. 612 */ 613 static int 614 _spdk_nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 615 const struct spdk_nvme_transport_id *trid) 616 { 617 int trsvcid_int; 618 619 trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid); 620 if (trsvcid_int < 0) { 621 return -EINVAL; 622 } 623 624 memset(canon_trid, 0, sizeof(*canon_trid)); 625 canon_trid->trtype = SPDK_NVME_TRANSPORT_TCP; 626 canon_trid->adrfam = trid->adrfam; 627 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 628 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 629 630 return 0; 631 } 632 633 /** 634 * Find an existing listening port. 635 * 636 * Caller must hold ttransport->lock. 637 */ 638 static struct spdk_nvmf_tcp_port * 639 _spdk_nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 640 const struct spdk_nvme_transport_id *trid) 641 { 642 struct spdk_nvme_transport_id canon_trid; 643 struct spdk_nvmf_tcp_port *port; 644 645 if (_spdk_nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 646 return NULL; 647 } 648 649 TAILQ_FOREACH(port, &ttransport->ports, link) { 650 if (spdk_nvme_transport_id_compare(&canon_trid, &port->trid) == 0) { 651 return port; 652 } 653 } 654 655 return NULL; 656 } 657 658 static int 659 spdk_nvmf_tcp_listen(struct spdk_nvmf_transport *transport, 660 const struct spdk_nvme_transport_id *trid) 661 { 662 struct spdk_nvmf_tcp_transport *ttransport; 663 struct spdk_nvmf_tcp_port *port; 664 int trsvcid_int; 665 uint8_t adrfam; 666 667 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 668 669 trsvcid_int = _spdk_nvmf_tcp_trsvcid_to_int(trid->trsvcid); 670 if (trsvcid_int < 0) { 671 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 672 return -EINVAL; 673 } 674 675 pthread_mutex_lock(&ttransport->lock); 676 677 port = _spdk_nvmf_tcp_find_port(ttransport, trid); 678 if (port) { 679 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Already listening on %s port %s\n", 680 trid->traddr, trid->trsvcid); 681 port->ref++; 682 pthread_mutex_unlock(&ttransport->lock); 683 return 0; 684 } 685 686 port = calloc(1, sizeof(*port)); 687 if (!port) { 688 SPDK_ERRLOG("Port allocation failed\n"); 689 free(port); 690 pthread_mutex_unlock(&ttransport->lock); 691 return -ENOMEM; 692 } 693 694 port->ref = 1; 695 696 if (_spdk_nvmf_tcp_canon_listen_trid(&port->trid, trid) != 0) { 697 SPDK_ERRLOG("Invalid traddr %s / trsvcid %s\n", 698 trid->traddr, trid->trsvcid); 699 free(port); 700 pthread_mutex_unlock(&ttransport->lock); 701 return -ENOMEM; 702 } 703 704 port->listen_sock = spdk_sock_listen(trid->traddr, trsvcid_int); 705 if (port->listen_sock == NULL) { 706 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 707 trid->traddr, trsvcid_int, 708 spdk_strerror(errno), errno); 709 free(port); 710 pthread_mutex_unlock(&ttransport->lock); 711 return -errno; 712 } 713 714 if (spdk_sock_is_ipv4(port->listen_sock)) { 715 adrfam = SPDK_NVMF_ADRFAM_IPV4; 716 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 717 adrfam = SPDK_NVMF_ADRFAM_IPV6; 718 } else { 719 SPDK_ERRLOG("Unhandled socket type\n"); 720 adrfam = 0; 721 } 722 723 if (adrfam != trid->adrfam) { 724 SPDK_ERRLOG("Socket address family mismatch\n"); 725 spdk_sock_close(&port->listen_sock); 726 free(port); 727 pthread_mutex_unlock(&ttransport->lock); 728 return -EINVAL; 729 } 730 731 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %d ***\n", 732 trid->traddr, trsvcid_int); 733 734 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 735 pthread_mutex_unlock(&ttransport->lock); 736 737 return 0; 738 } 739 740 static int 741 spdk_nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 742 const struct spdk_nvme_transport_id *trid) 743 { 744 struct spdk_nvmf_tcp_transport *ttransport; 745 struct spdk_nvmf_tcp_port *port; 746 int rc; 747 748 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 749 750 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n", 751 trid->traddr, trid->trsvcid); 752 753 pthread_mutex_lock(&ttransport->lock); 754 port = _spdk_nvmf_tcp_find_port(ttransport, trid); 755 if (port) { 756 assert(port->ref > 0); 757 port->ref--; 758 if (port->ref == 0) { 759 TAILQ_REMOVE(&ttransport->ports, port, link); 760 spdk_sock_close(&port->listen_sock); 761 free(port); 762 } 763 rc = 0; 764 } else { 765 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Port not found\n"); 766 rc = -ENOENT; 767 } 768 pthread_mutex_unlock(&ttransport->lock); 769 770 return rc; 771 } 772 773 static int 774 spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair) 775 { 776 const int array_size = 32; 777 struct iovec iovs[array_size]; 778 int iovcnt = 0; 779 int bytes = 0; 780 int total_length = 0; 781 uint32_t mapped_length; 782 struct nvme_tcp_pdu *pdu; 783 int pdu_length; 784 TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list; 785 struct spdk_nvmf_tcp_transport *ttransport; 786 787 pdu = TAILQ_FIRST(&tqpair->send_queue); 788 789 if (pdu == NULL) { 790 return 0; 791 } 792 793 /* 794 * Build up a list of iovecs for the first few PDUs in the 795 * tqpair 's send_queue. 796 */ 797 while (pdu != NULL && ((array_size - iovcnt) >= 3)) { 798 iovcnt += nvme_tcp_build_iovs(&iovs[iovcnt], 799 array_size - iovcnt, 800 pdu, 801 tqpair->host_hdgst_enable, 802 tqpair->host_ddgst_enable, 803 &mapped_length); 804 total_length += mapped_length; 805 pdu = TAILQ_NEXT(pdu, tailq); 806 } 807 808 spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_START, 0, total_length, 0, iovcnt); 809 810 bytes = spdk_sock_writev(tqpair->sock, iovs, iovcnt); 811 if (bytes == -1) { 812 if (errno == EWOULDBLOCK || errno == EAGAIN) { 813 return 1; 814 } else { 815 SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n", 816 errno, spdk_strerror(errno)); 817 return -1; 818 } 819 } 820 821 spdk_trace_record(TRACE_TCP_FLUSH_WRITEBUF_DONE, 0, bytes, 0, 0); 822 823 pdu = TAILQ_FIRST(&tqpair->send_queue); 824 825 /* 826 * Free any PDUs that were fully written. If a PDU was only 827 * partially written, update its writev_offset so that next 828 * time only the unwritten portion will be sent to writev(). 829 */ 830 TAILQ_INIT(&completed_pdus_list); 831 while (bytes > 0) { 832 pdu_length = pdu->hdr.common.plen - pdu->writev_offset; 833 if (bytes >= pdu_length) { 834 bytes -= pdu_length; 835 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 836 TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq); 837 pdu = TAILQ_FIRST(&tqpair->send_queue); 838 839 } else { 840 pdu->writev_offset += bytes; 841 bytes = 0; 842 } 843 } 844 845 while (!TAILQ_EMPTY(&completed_pdus_list)) { 846 pdu = TAILQ_FIRST(&completed_pdus_list); 847 TAILQ_REMOVE(&completed_pdus_list, pdu, tailq); 848 assert(pdu->cb_fn != NULL); 849 pdu->cb_fn(pdu->cb_arg); 850 spdk_nvmf_tcp_pdu_put(tqpair, pdu); 851 } 852 853 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 854 spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair); 855 856 return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1; 857 } 858 859 static int 860 spdk_nvmf_tcp_qpair_flush_pdus(void *_tqpair) 861 { 862 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 863 int rc; 864 865 if (tqpair->state == NVME_TCP_QPAIR_STATE_RUNNING) { 866 rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair); 867 if (rc == 0 && tqpair->flush_poller != NULL) { 868 spdk_poller_unregister(&tqpair->flush_poller); 869 } else if (rc == 1 && tqpair->flush_poller == NULL) { 870 tqpair->flush_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_flush_pdus, 871 tqpair, 50); 872 } 873 } else { 874 /* 875 * If the tqpair state is not RUNNING, then 876 * keep trying to flush PDUs until our list is 877 * empty - to make sure all data is sent before 878 * closing the connection. 879 */ 880 do { 881 rc = spdk_nvmf_tcp_qpair_flush_pdus_internal(tqpair); 882 } while (rc == 1); 883 } 884 885 if (rc < 0 && tqpair->state < NVME_TCP_QPAIR_STATE_EXITING) { 886 /* 887 * If the poller has already started destruction of the tqpair, 888 * i.e. the socket read failed, then the connection state may already 889 * be EXITED. We don't want to set it back to EXITING in that case. 890 */ 891 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 892 } 893 894 return -1; 895 } 896 897 static void 898 spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 899 struct nvme_tcp_pdu *pdu, 900 nvme_tcp_qpair_xfer_complete_cb cb_fn, 901 void *cb_arg) 902 { 903 int enable_digest; 904 int hlen; 905 uint32_t crc32c; 906 907 hlen = pdu->hdr.common.hlen; 908 enable_digest = 1; 909 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 910 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 911 /* this PDU should be sent without digest */ 912 enable_digest = 0; 913 } 914 915 /* Header Digest */ 916 if (enable_digest && tqpair->host_hdgst_enable) { 917 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 918 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 919 } 920 921 /* Data Digest */ 922 if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) { 923 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 924 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 925 } 926 927 pdu->cb_fn = cb_fn; 928 pdu->cb_arg = cb_arg; 929 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 930 spdk_nvmf_tcp_qpair_flush_pdus(tqpair); 931 } 932 933 static int 934 spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size) 935 { 936 int i; 937 struct spdk_nvmf_tcp_req *tcp_req; 938 struct spdk_nvmf_transport *transport = tqpair->qpair.transport; 939 uint32_t in_capsule_data_size; 940 941 in_capsule_data_size = transport->opts.in_capsule_data_size; 942 if (transport->opts.dif_insert_or_strip) { 943 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 944 } 945 946 if (!tqpair->qpair.sq_head_max) { 947 tqpair->req = calloc(1, sizeof(*tqpair->req)); 948 if (!tqpair->req) { 949 SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair); 950 return -1; 951 } 952 953 if (in_capsule_data_size) { 954 tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000, 955 NULL, SPDK_ENV_LCORE_ID_ANY, 956 SPDK_MALLOC_DMA); 957 if (!tqpair->buf) { 958 SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair); 959 return -1; 960 } 961 } 962 963 tcp_req = tqpair->req; 964 tcp_req->ttag = 0; 965 tcp_req->req.qpair = &tqpair->qpair; 966 967 /* Set up memory to receive commands */ 968 if (tqpair->buf) { 969 tcp_req->buf = tqpair->buf; 970 } 971 972 /* Set the cmdn and rsp */ 973 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 974 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 975 976 /* Initialize request state to FREE */ 977 tcp_req->state = TCP_REQUEST_STATE_FREE; 978 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 979 980 tqpair->pdu = calloc(NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1, sizeof(*tqpair->pdu)); 981 if (!tqpair->pdu) { 982 SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair); 983 return -1; 984 } 985 986 for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) { 987 TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq); 988 } 989 990 } else { 991 tqpair->reqs = calloc(size, sizeof(*tqpair->reqs)); 992 if (!tqpair->reqs) { 993 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 994 return -1; 995 } 996 997 if (in_capsule_data_size) { 998 tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000, 999 NULL, SPDK_ENV_LCORE_ID_ANY, 1000 SPDK_MALLOC_DMA); 1001 if (!tqpair->bufs) { 1002 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 1003 return -1; 1004 } 1005 } 1006 1007 for (i = 0; i < size; i++) { 1008 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 1009 1010 tcp_req->ttag = i + 1; 1011 tcp_req->req.qpair = &tqpair->qpair; 1012 1013 /* Set up memory to receive commands */ 1014 if (tqpair->bufs) { 1015 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 1016 } 1017 1018 /* Set the cmdn and rsp */ 1019 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 1020 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 1021 1022 /* Initialize request state to FREE */ 1023 tcp_req->state = TCP_REQUEST_STATE_FREE; 1024 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 1025 } 1026 1027 tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool)); 1028 if (!tqpair->pdu_pool) { 1029 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 1030 return -1; 1031 } 1032 1033 for (i = 0; i < size; i++) { 1034 TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq); 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 static int 1042 spdk_nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 1043 { 1044 struct spdk_nvmf_tcp_qpair *tqpair; 1045 int i; 1046 1047 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1048 1049 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair); 1050 1051 TAILQ_INIT(&tqpair->send_queue); 1052 TAILQ_INIT(&tqpair->free_queue); 1053 TAILQ_INIT(&tqpair->queued_c2h_data_tcp_req); 1054 1055 /* Initialise request state queues of the qpair */ 1056 for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) { 1057 TAILQ_INIT(&tqpair->state_queue[i]); 1058 } 1059 1060 tqpair->host_hdgst_enable = true; 1061 tqpair->host_ddgst_enable = true; 1062 1063 return 0; 1064 } 1065 1066 static int 1067 spdk_nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 1068 { 1069 1070 int rc; 1071 int buf_size; 1072 1073 /* set recv buffer size */ 1074 buf_size = 2 * 1024 * 1024; 1075 rc = spdk_sock_set_recvbuf(tqpair->sock, buf_size); 1076 if (rc != 0) { 1077 SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n"); 1078 return rc; 1079 } 1080 1081 /* set send buffer size */ 1082 rc = spdk_sock_set_sendbuf(tqpair->sock, buf_size); 1083 if (rc != 0) { 1084 SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n"); 1085 return rc; 1086 } 1087 1088 /* set low water mark */ 1089 rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_c2h_data_hdr)); 1090 if (rc != 0) { 1091 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 1092 return rc; 1093 } 1094 1095 return 0; 1096 } 1097 1098 static void 1099 _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, 1100 struct spdk_nvmf_tcp_port *port, 1101 struct spdk_sock *sock, new_qpair_fn cb_fn) 1102 { 1103 struct spdk_nvmf_tcp_qpair *tqpair; 1104 int rc; 1105 1106 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n", 1107 port->trid.traddr, port->trid.trsvcid); 1108 1109 if (transport->opts.sock_priority) { 1110 rc = spdk_sock_set_priority(sock, transport->opts.sock_priority); 1111 if (rc) { 1112 SPDK_ERRLOG("Failed to set the priority of the socket\n"); 1113 spdk_sock_close(&sock); 1114 return; 1115 } 1116 } 1117 1118 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 1119 if (tqpair == NULL) { 1120 SPDK_ERRLOG("Could not allocate new connection.\n"); 1121 spdk_sock_close(&sock); 1122 return; 1123 } 1124 1125 tqpair->sock = sock; 1126 tqpair->max_queue_depth = 1; 1127 tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; 1128 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth; 1129 tqpair->port = port; 1130 tqpair->qpair.transport = transport; 1131 1132 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 1133 sizeof(tqpair->target_addr), &tqpair->target_port, 1134 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 1135 &tqpair->initiator_port); 1136 if (rc < 0) { 1137 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 1138 spdk_nvmf_tcp_qpair_destroy(tqpair); 1139 return; 1140 } 1141 1142 cb_fn(&tqpair->qpair); 1143 } 1144 1145 static void 1146 spdk_nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port, 1147 new_qpair_fn cb_fn) 1148 { 1149 struct spdk_sock *sock; 1150 int i; 1151 1152 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 1153 sock = spdk_sock_accept(port->listen_sock); 1154 if (sock) { 1155 _spdk_nvmf_tcp_handle_connect(transport, port, sock, cb_fn); 1156 } 1157 } 1158 } 1159 1160 static void 1161 spdk_nvmf_tcp_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn) 1162 { 1163 struct spdk_nvmf_tcp_transport *ttransport; 1164 struct spdk_nvmf_tcp_port *port; 1165 1166 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1167 1168 TAILQ_FOREACH(port, &ttransport->ports, link) { 1169 spdk_nvmf_tcp_port_accept(transport, port, cb_fn); 1170 } 1171 } 1172 1173 static void 1174 spdk_nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1175 struct spdk_nvme_transport_id *trid, 1176 struct spdk_nvmf_discovery_log_page_entry *entry) 1177 { 1178 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1179 entry->adrfam = trid->adrfam; 1180 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED; 1181 1182 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1183 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1184 1185 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1186 } 1187 1188 static struct spdk_nvmf_transport_poll_group * 1189 spdk_nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) 1190 { 1191 struct spdk_nvmf_tcp_poll_group *tgroup; 1192 1193 tgroup = calloc(1, sizeof(*tgroup)); 1194 if (!tgroup) { 1195 return NULL; 1196 } 1197 1198 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1199 if (!tgroup->sock_group) { 1200 goto cleanup; 1201 } 1202 1203 TAILQ_INIT(&tgroup->qpairs); 1204 TAILQ_INIT(&tgroup->pending_data_buf_queue); 1205 1206 return &tgroup->group; 1207 1208 cleanup: 1209 free(tgroup); 1210 return NULL; 1211 } 1212 1213 static struct spdk_nvmf_transport_poll_group * 1214 spdk_nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1215 { 1216 struct spdk_nvmf_tcp_qpair *tqpair; 1217 struct spdk_sock_group *group = NULL; 1218 int rc; 1219 1220 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1221 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group); 1222 if (!rc && group != NULL) { 1223 return spdk_sock_group_get_ctx(group); 1224 } 1225 1226 return NULL; 1227 } 1228 1229 static void 1230 spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1231 { 1232 struct spdk_nvmf_tcp_poll_group *tgroup; 1233 1234 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1235 spdk_sock_group_close(&tgroup->sock_group); 1236 1237 if (!TAILQ_EMPTY(&tgroup->pending_data_buf_queue)) { 1238 SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n"); 1239 } 1240 1241 free(tgroup); 1242 } 1243 1244 static void 1245 spdk_nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1246 enum nvme_tcp_pdu_recv_state state) 1247 { 1248 if (tqpair->recv_state == state) { 1249 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1250 tqpair, state); 1251 return; 1252 } 1253 1254 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state); 1255 tqpair->recv_state = state; 1256 1257 switch (state) { 1258 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1259 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1260 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1261 break; 1262 case NVME_TCP_PDU_RECV_STATE_ERROR: 1263 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1264 memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress)); 1265 break; 1266 default: 1267 SPDK_ERRLOG("The state(%d) is invalid\n", state); 1268 abort(); 1269 break; 1270 } 1271 } 1272 1273 static int 1274 spdk_nvmf_tcp_qpair_handle_timeout(void *ctx) 1275 { 1276 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1277 1278 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1279 1280 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1281 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1282 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; 1283 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair); 1284 spdk_poller_unregister(&tqpair->timeout_poller); 1285 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 1286 1287 return 0; 1288 } 1289 1290 static void 1291 spdk_nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1292 { 1293 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1294 1295 if (!tqpair->timeout_poller) { 1296 tqpair->timeout_poller = spdk_poller_register(spdk_nvmf_tcp_qpair_handle_timeout, tqpair, 1297 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1298 } 1299 } 1300 1301 static void 1302 spdk_nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1303 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1304 { 1305 struct nvme_tcp_pdu *rsp_pdu; 1306 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1307 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1308 uint32_t copy_len; 1309 1310 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1311 if (!rsp_pdu) { 1312 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1313 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1314 return; 1315 } 1316 1317 c2h_term_req = &rsp_pdu->hdr.term_req; 1318 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1319 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1320 1321 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1322 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1323 DSET32(&c2h_term_req->fei, error_offset); 1324 } 1325 1326 copy_len = pdu->hdr.common.hlen; 1327 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 1328 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 1329 } 1330 1331 /* Copy the error info into the buffer */ 1332 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1333 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1334 1335 /* Contain the header of the wrong received pdu */ 1336 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1337 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1338 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_c2h_term_req_complete, tqpair); 1339 } 1340 1341 static void 1342 spdk_nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1343 struct spdk_nvmf_tcp_qpair *tqpair, 1344 struct nvme_tcp_pdu *pdu) 1345 { 1346 struct spdk_nvmf_tcp_req *tcp_req; 1347 1348 tcp_req = spdk_nvmf_tcp_req_get(tqpair); 1349 if (!tcp_req) { 1350 SPDK_ERRLOG("Cannot allocate tcp_req\n"); 1351 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1352 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1353 return; 1354 } 1355 1356 pdu->ctx = tcp_req; 1357 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 1358 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1359 return; 1360 } 1361 1362 static void 1363 spdk_nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1364 struct spdk_nvmf_tcp_qpair *tqpair, 1365 struct nvme_tcp_pdu *pdu) 1366 { 1367 struct spdk_nvmf_tcp_req *tcp_req; 1368 struct spdk_nvme_tcp_cmd *capsule_cmd; 1369 uint32_t error_offset = 0; 1370 enum spdk_nvme_tcp_term_req_fes fes; 1371 1372 capsule_cmd = &pdu->hdr.capsule_cmd; 1373 tcp_req = pdu->ctx; 1374 assert(tcp_req != NULL); 1375 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1376 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1377 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1378 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1379 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1380 goto err; 1381 } 1382 1383 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1384 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1385 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1386 1387 return; 1388 err: 1389 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1390 } 1391 1392 static void 1393 spdk_nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1394 struct spdk_nvmf_tcp_qpair *tqpair, 1395 struct nvme_tcp_pdu *pdu) 1396 { 1397 struct spdk_nvmf_tcp_req *tcp_req; 1398 uint32_t error_offset = 0; 1399 enum spdk_nvme_tcp_term_req_fes fes = 0; 1400 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1401 bool ttag_offset_error = false; 1402 1403 h2c_data = &pdu->hdr.h2c_data; 1404 1405 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1406 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1407 1408 /* According to the information in the pdu to find the req */ 1409 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER], 1410 state_link) { 1411 if ((tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid) && (tcp_req->ttag == h2c_data->ttag)) { 1412 break; 1413 } 1414 1415 if (!ttag_offset_error && (tcp_req->req.cmd->nvme_cmd.cid == h2c_data->cccid)) { 1416 ttag_offset_error = true; 1417 } 1418 } 1419 1420 if (!tcp_req) { 1421 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair); 1422 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 1423 if (!ttag_offset_error) { 1424 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1425 } else { 1426 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1427 } 1428 goto err; 1429 } 1430 1431 if (tcp_req->next_expected_r2t_offset != h2c_data->datao) { 1432 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1433 "tcp_req(%p), tqpair=%p, expected_r2t_offset=%u, but data offset =%u\n", 1434 tcp_req, tqpair, tcp_req->next_expected_r2t_offset, h2c_data->datao); 1435 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1436 goto err; 1437 } 1438 1439 if (h2c_data->datal > tqpair->maxh2cdata) { 1440 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req(%p), tqpair=%p, datao=%u execeeds maxh2cdata size=%u\n", 1441 tcp_req, tqpair, h2c_data->datao, tqpair->maxh2cdata); 1442 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1443 goto err; 1444 } 1445 1446 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1447 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1448 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n", 1449 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1450 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1451 goto err; 1452 } 1453 1454 pdu->ctx = tcp_req; 1455 1456 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 1457 pdu->dif_ctx = &tcp_req->dif_ctx; 1458 } 1459 1460 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 1461 h2c_data->datao, h2c_data->datal); 1462 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1463 return; 1464 1465 err: 1466 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1467 } 1468 1469 static void 1470 spdk_nvmf_tcp_pdu_cmd_complete(void *cb_arg) 1471 { 1472 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1473 nvmf_tcp_request_free(tcp_req); 1474 } 1475 1476 static void 1477 spdk_nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 1478 struct spdk_nvmf_tcp_qpair *tqpair) 1479 { 1480 struct nvme_tcp_pdu *rsp_pdu; 1481 struct spdk_nvme_tcp_rsp *capsule_resp; 1482 1483 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair); 1484 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1485 if (!rsp_pdu) { 1486 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1487 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1488 return; 1489 } 1490 1491 capsule_resp = &rsp_pdu->hdr.capsule_resp; 1492 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1493 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 1494 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 1495 if (tqpair->host_hdgst_enable) { 1496 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1497 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1498 } 1499 1500 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, tcp_req); 1501 } 1502 1503 static void 1504 spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 1505 { 1506 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1507 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 1508 struct spdk_nvmf_tcp_qpair, qpair); 1509 1510 assert(tqpair != NULL); 1511 assert(tcp_req->c2h_data_pdu_num > 0); 1512 tcp_req->c2h_data_pdu_num--; 1513 if (!tcp_req->c2h_data_pdu_num) { 1514 if (tqpair->qpair.transport->opts.c2h_success) { 1515 nvmf_tcp_request_free(tcp_req); 1516 } else { 1517 spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 1518 } 1519 } 1520 1521 tqpair->c2h_data_pdu_cnt--; 1522 spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair); 1523 } 1524 1525 static void 1526 spdk_nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1527 struct spdk_nvmf_tcp_req *tcp_req) 1528 { 1529 struct nvme_tcp_pdu *rsp_pdu; 1530 struct spdk_nvme_tcp_r2t_hdr *r2t; 1531 1532 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1533 if (!rsp_pdu) { 1534 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1535 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1536 return; 1537 } 1538 1539 r2t = &rsp_pdu->hdr.r2t; 1540 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 1541 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 1542 1543 if (tqpair->host_hdgst_enable) { 1544 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1545 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1546 } 1547 1548 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 1549 r2t->ttag = tcp_req->ttag; 1550 r2t->r2to = tcp_req->next_expected_r2t_offset; 1551 r2t->r2tl = spdk_min(tcp_req->req.length - tcp_req->next_expected_r2t_offset, tqpair->maxh2cdata); 1552 tcp_req->r2tl_remain = r2t->r2tl; 1553 1554 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 1555 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 1556 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 1557 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_cmd_complete, NULL); 1558 } 1559 1560 static void 1561 spdk_nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1562 struct spdk_nvmf_tcp_qpair *tqpair, 1563 struct nvme_tcp_pdu *pdu) 1564 { 1565 struct spdk_nvmf_tcp_req *tcp_req; 1566 1567 tcp_req = pdu->ctx; 1568 assert(tcp_req != NULL); 1569 1570 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 1571 1572 tcp_req->next_expected_r2t_offset += pdu->data_len; 1573 tcp_req->r2tl_remain -= pdu->data_len; 1574 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1575 1576 if (!tcp_req->r2tl_remain) { 1577 if (tcp_req->next_expected_r2t_offset == tcp_req->req.length) { 1578 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1579 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 1580 } else { 1581 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Send r2t pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair); 1582 spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 1583 } 1584 } 1585 } 1586 1587 static void 1588 spdk_nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 1589 { 1590 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 1591 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 1592 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1593 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1594 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n", 1595 DGET32(h2c_term_req->fei)); 1596 } 1597 } 1598 1599 static void 1600 spdk_nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1601 struct nvme_tcp_pdu *pdu) 1602 { 1603 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1604 uint32_t error_offset = 0; 1605 enum spdk_nvme_tcp_term_req_fes fes; 1606 1607 1608 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1609 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 1610 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1611 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1612 goto end; 1613 } 1614 1615 /* set the data buffer */ 1616 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 1617 h2c_term_req->common.plen - h2c_term_req->common.hlen); 1618 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1619 return; 1620 end: 1621 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1622 return; 1623 } 1624 1625 static void 1626 spdk_nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1627 struct nvme_tcp_pdu *pdu) 1628 { 1629 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1630 1631 spdk_nvmf_tcp_h2c_term_req_dump(h2c_term_req); 1632 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1633 return; 1634 } 1635 1636 static void 1637 spdk_nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1638 { 1639 int rc = 0; 1640 struct nvme_tcp_pdu *pdu; 1641 uint32_t crc32c, error_offset = 0; 1642 enum spdk_nvme_tcp_term_req_fes fes; 1643 struct spdk_nvmf_tcp_transport *ttransport; 1644 1645 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1646 pdu = &tqpair->pdu_in_progress; 1647 1648 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 1649 /* check data digest if need */ 1650 if (pdu->ddgst_enable) { 1651 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1652 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1653 if (rc == 0) { 1654 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1655 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1656 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1657 return; 1658 1659 } 1660 } 1661 1662 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 1663 switch (pdu->hdr.common.pdu_type) { 1664 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1665 spdk_nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 1666 break; 1667 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1668 spdk_nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 1669 break; 1670 1671 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1672 spdk_nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 1673 break; 1674 1675 default: 1676 /* The code should not go to here */ 1677 SPDK_ERRLOG("The code should not go to here\n"); 1678 break; 1679 } 1680 } 1681 1682 static void 1683 spdk_nvmf_tcp_send_icresp_complete(void *cb_arg) 1684 { 1685 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 1686 1687 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1688 } 1689 1690 static void 1691 spdk_nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 1692 struct spdk_nvmf_tcp_qpair *tqpair, 1693 struct nvme_tcp_pdu *pdu) 1694 { 1695 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 1696 struct nvme_tcp_pdu *rsp_pdu; 1697 struct spdk_nvme_tcp_ic_resp *ic_resp; 1698 uint32_t error_offset = 0; 1699 enum spdk_nvme_tcp_term_req_fes fes; 1700 1701 /* Only PFV 0 is defined currently */ 1702 if (ic_req->pfv != 0) { 1703 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 1704 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1705 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 1706 goto end; 1707 } 1708 1709 /* MAXR2T is 0's based */ 1710 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 1711 1712 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 1713 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 1714 1715 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 1716 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 1717 1718 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 1719 if (!rsp_pdu) { 1720 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1721 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1722 return; 1723 } 1724 1725 ic_resp = &rsp_pdu->hdr.ic_resp; 1726 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 1727 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 1728 ic_resp->pfv = 0; 1729 ic_resp->cpda = tqpair->cpda; 1730 tqpair->maxh2cdata = spdk_min(NVMF_TCP_PDU_MAX_H2C_DATA_SIZE, 1731 ttransport->transport.opts.io_unit_size); 1732 ic_resp->maxh2cdata = tqpair->maxh2cdata; 1733 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 1734 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 1735 1736 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 1737 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 1738 1739 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_send_icresp_complete, tqpair); 1740 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1741 return; 1742 end: 1743 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1744 return; 1745 } 1746 1747 static void 1748 spdk_nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1749 { 1750 struct nvme_tcp_pdu *pdu; 1751 int rc; 1752 uint32_t crc32c, error_offset = 0; 1753 enum spdk_nvme_tcp_term_req_fes fes; 1754 struct spdk_nvmf_tcp_transport *ttransport; 1755 1756 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1757 pdu = &tqpair->pdu_in_progress; 1758 1759 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair, 1760 pdu->hdr.common.pdu_type); 1761 /* check header digest if needed */ 1762 if (pdu->has_hdgst) { 1763 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 1764 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1765 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1766 if (rc == 0) { 1767 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1768 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1769 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1770 return; 1771 1772 } 1773 } 1774 1775 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 1776 switch (pdu->hdr.common.pdu_type) { 1777 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 1778 spdk_nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 1779 break; 1780 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1781 spdk_nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 1782 break; 1783 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1784 spdk_nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 1785 break; 1786 1787 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1788 spdk_nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 1789 break; 1790 1791 default: 1792 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type); 1793 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1794 error_offset = 1; 1795 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1796 break; 1797 } 1798 } 1799 1800 static void 1801 spdk_nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1802 { 1803 struct nvme_tcp_pdu *pdu; 1804 uint32_t error_offset = 0; 1805 enum spdk_nvme_tcp_term_req_fes fes; 1806 uint8_t expected_hlen, pdo; 1807 bool plen_error = false, pdo_error = false; 1808 1809 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1810 pdu = &tqpair->pdu_in_progress; 1811 1812 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 1813 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 1814 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 1815 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1816 goto err; 1817 } 1818 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 1819 if (pdu->hdr.common.plen != expected_hlen) { 1820 plen_error = true; 1821 } 1822 } else { 1823 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1824 SPDK_ERRLOG("The TCP/IP connection is not negotitated\n"); 1825 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1826 goto err; 1827 } 1828 1829 switch (pdu->hdr.common.pdu_type) { 1830 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1831 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 1832 pdo = pdu->hdr.common.pdo; 1833 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1834 pdo_error = true; 1835 break; 1836 } 1837 1838 if (pdu->hdr.common.plen < expected_hlen) { 1839 plen_error = true; 1840 } 1841 break; 1842 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1843 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 1844 pdo = pdu->hdr.common.pdo; 1845 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1846 pdo_error = true; 1847 break; 1848 } 1849 if (pdu->hdr.common.plen < expected_hlen) { 1850 plen_error = true; 1851 } 1852 break; 1853 1854 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1855 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 1856 if ((pdu->hdr.common.plen <= expected_hlen) || 1857 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 1858 plen_error = true; 1859 } 1860 break; 1861 1862 default: 1863 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 1864 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1865 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 1866 goto err; 1867 } 1868 } 1869 1870 if (pdu->hdr.common.hlen != expected_hlen) { 1871 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 1872 pdu->hdr.common.pdu_type, 1873 expected_hlen, pdu->hdr.common.hlen, tqpair); 1874 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1875 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 1876 goto err; 1877 } else if (pdo_error) { 1878 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1879 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1880 } else if (plen_error) { 1881 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1882 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 1883 goto err; 1884 } else { 1885 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1886 return; 1887 } 1888 err: 1889 spdk_nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1890 } 1891 1892 static int 1893 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset, 1894 int read_len) 1895 { 1896 int rc; 1897 1898 rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 1899 read_offset, read_len, pdu->dif_ctx); 1900 if (rc != 0) { 1901 SPDK_ERRLOG("DIF generate failed\n"); 1902 } 1903 1904 return rc; 1905 } 1906 1907 #define MAX_NVME_TCP_PDU_LOOP_COUNT 32 1908 1909 static int 1910 spdk_nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 1911 { 1912 int rc = 0; 1913 struct nvme_tcp_pdu *pdu; 1914 enum nvme_tcp_pdu_recv_state prev_state; 1915 uint32_t data_len, current_pdu_num = 0; 1916 uint8_t psh_len, pdo, hlen; 1917 int8_t padding_len; 1918 1919 /* The loop here is to allow for several back-to-back state changes. */ 1920 do { 1921 prev_state = tqpair->recv_state; 1922 1923 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 1924 1925 switch (tqpair->recv_state) { 1926 /* Wait for the common header */ 1927 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1928 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1929 pdu = &tqpair->pdu_in_progress; 1930 1931 rc = nvme_tcp_read_data(tqpair->sock, 1932 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1933 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 1934 if (rc < 0) { 1935 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair); 1936 return NVME_TCP_PDU_FATAL; 1937 } else if (rc > 0) { 1938 pdu->ch_valid_bytes += rc; 1939 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); 1940 if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { 1941 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1942 } 1943 } 1944 1945 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1946 return NVME_TCP_PDU_IN_PROGRESS; 1947 } 1948 1949 /* The command header of this PDU has now been read from the socket. */ 1950 spdk_nvmf_tcp_pdu_ch_handle(tqpair); 1951 break; 1952 /* Wait for the pdu specific header */ 1953 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1954 pdu = &tqpair->pdu_in_progress; 1955 psh_len = hlen = pdu->hdr.common.hlen; 1956 /* Only capsule_cmd and h2c_data has header digest */ 1957 if (((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD) || 1958 (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_DATA)) && 1959 tqpair->host_hdgst_enable) { 1960 pdu->has_hdgst = true; 1961 psh_len += SPDK_NVME_TCP_DIGEST_LEN; 1962 if (pdu->hdr.common.plen > psh_len) { 1963 pdo = pdu->hdr.common.pdo; 1964 padding_len = pdo - psh_len; 1965 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "padding length is =%d for pdu=%p on tqpair=%p\n", padding_len, 1966 pdu, tqpair); 1967 if (padding_len > 0) { 1968 psh_len = pdo; 1969 } 1970 } 1971 } 1972 1973 psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr); 1974 /* The following will read psh + hdgest (if possbile) + padding (if posssible) */ 1975 if (pdu->psh_valid_bytes < psh_len) { 1976 rc = nvme_tcp_read_data(tqpair->sock, 1977 psh_len - pdu->psh_valid_bytes, 1978 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1979 if (rc < 0) { 1980 return NVME_TCP_PDU_FATAL; 1981 } else if (rc > 0) { 1982 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 1983 0, rc, 0, 0); 1984 pdu->psh_valid_bytes += rc; 1985 } 1986 if (pdu->psh_valid_bytes < psh_len) { 1987 return NVME_TCP_PDU_IN_PROGRESS; 1988 } 1989 } 1990 1991 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1992 spdk_nvmf_tcp_pdu_psh_handle(tqpair); 1993 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1994 current_pdu_num++; 1995 } 1996 break; 1997 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1998 pdu = &tqpair->pdu_in_progress; 1999 2000 /* check whether the data is valid, if not we just return */ 2001 if (!pdu->data_len) { 2002 return NVME_TCP_PDU_IN_PROGRESS; 2003 } 2004 2005 data_len = pdu->data_len; 2006 /* data digest */ 2007 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 2008 tqpair->host_ddgst_enable)) { 2009 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2010 pdu->ddgst_enable = true; 2011 } 2012 2013 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2014 if (rc < 0) { 2015 return NVME_TCP_PDU_IN_PROGRESS; 2016 } 2017 pdu->readv_offset += rc; 2018 2019 if (spdk_unlikely(pdu->dif_ctx != NULL)) { 2020 rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc); 2021 if (rc != 0) { 2022 return NVME_TCP_PDU_FATAL; 2023 } 2024 } 2025 2026 if (pdu->readv_offset < data_len) { 2027 return NVME_TCP_PDU_IN_PROGRESS; 2028 } 2029 2030 /* All of this PDU has now been read from the socket. */ 2031 spdk_nvmf_tcp_pdu_payload_handle(tqpair); 2032 current_pdu_num++; 2033 break; 2034 case NVME_TCP_PDU_RECV_STATE_ERROR: 2035 pdu = &tqpair->pdu_in_progress; 2036 /* Check whether the connection is closed. Each time, we only read 1 byte every time */ 2037 rc = nvme_tcp_read_data(tqpair->sock, 1, (void *)&pdu->hdr.common); 2038 if (rc < 0) { 2039 return NVME_TCP_PDU_FATAL; 2040 } 2041 break; 2042 default: 2043 assert(0); 2044 SPDK_ERRLOG("code should not come to here"); 2045 break; 2046 } 2047 } while ((tqpair->recv_state != prev_state) && (current_pdu_num < MAX_NVME_TCP_PDU_LOOP_COUNT)); 2048 2049 return rc; 2050 } 2051 2052 static enum spdk_nvme_data_transfer 2053 spdk_nvmf_tcp_req_get_xfer(struct spdk_nvmf_tcp_req *tcp_req) { 2054 enum spdk_nvme_data_transfer xfer; 2055 struct spdk_nvme_cmd *cmd = &tcp_req->req.cmd->nvme_cmd; 2056 struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1; 2057 2058 /* Figure out data transfer direction */ 2059 if (cmd->opc == SPDK_NVME_OPC_FABRIC) 2060 { 2061 xfer = spdk_nvme_opc_get_data_transfer(tcp_req->req.cmd->nvmf_cmd.fctype); 2062 } else 2063 { 2064 xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2065 2066 /* Some admin commands are special cases */ 2067 if ((tcp_req->req.qpair->qid == 0) && 2068 ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) || 2069 (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) { 2070 switch (cmd->cdw10 & 0xff) { 2071 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 2072 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 2073 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 2074 break; 2075 default: 2076 xfer = SPDK_NVME_DATA_NONE; 2077 } 2078 } 2079 } 2080 2081 if (xfer == SPDK_NVME_DATA_NONE) 2082 { 2083 return xfer; 2084 } 2085 2086 /* Even for commands that may transfer data, they could have specified 0 length. 2087 * We want those to show up with xfer SPDK_NVME_DATA_NONE. 2088 */ 2089 switch (sgl->generic.type) 2090 { 2091 case SPDK_NVME_SGL_TYPE_DATA_BLOCK: 2092 case SPDK_NVME_SGL_TYPE_BIT_BUCKET: 2093 case SPDK_NVME_SGL_TYPE_SEGMENT: 2094 case SPDK_NVME_SGL_TYPE_LAST_SEGMENT: 2095 case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK: 2096 if (sgl->unkeyed.length == 0) { 2097 xfer = SPDK_NVME_DATA_NONE; 2098 } 2099 break; 2100 case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK: 2101 if (sgl->keyed.length == 0) { 2102 xfer = SPDK_NVME_DATA_NONE; 2103 } 2104 break; 2105 } 2106 2107 return xfer; 2108 } 2109 2110 static void 2111 spdk_nvmf_tcp_request_free_buffers(struct spdk_nvmf_tcp_req *tcp_req, 2112 struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport) 2113 { 2114 for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) { 2115 assert(tcp_req->buffers[i] != NULL); 2116 if (group->buf_cache_count < group->buf_cache_size) { 2117 STAILQ_INSERT_HEAD(&group->buf_cache, 2118 (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link); 2119 group->buf_cache_count++; 2120 } else { 2121 spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]); 2122 } 2123 tcp_req->req.iov[i].iov_base = NULL; 2124 tcp_req->buffers[i] = NULL; 2125 tcp_req->req.iov[i].iov_len = 0; 2126 } 2127 tcp_req->data_from_pool = false; 2128 } 2129 2130 static int 2131 spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport, 2132 struct spdk_nvmf_tcp_req *tcp_req, uint32_t length) 2133 { 2134 void *buf = NULL; 2135 uint32_t i = 0; 2136 struct spdk_nvmf_tcp_qpair *tqpair; 2137 struct spdk_nvmf_transport_poll_group *group; 2138 2139 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2140 group = &tqpair->group->group; 2141 2142 tcp_req->req.iovcnt = 0; 2143 while (length) { 2144 if (!(STAILQ_EMPTY(&group->buf_cache))) { 2145 group->buf_cache_count--; 2146 buf = STAILQ_FIRST(&group->buf_cache); 2147 STAILQ_REMOVE_HEAD(&group->buf_cache, link); 2148 } else { 2149 buf = spdk_mempool_get(ttransport->transport.data_buf_pool); 2150 if (!buf) { 2151 goto nomem; 2152 } 2153 } 2154 2155 tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) & 2156 ~NVMF_DATA_BUFFER_MASK); 2157 tcp_req->req.iov[i].iov_len = spdk_min(length, ttransport->transport.opts.io_unit_size); 2158 tcp_req->req.iovcnt++; 2159 tcp_req->buffers[i] = buf; 2160 length -= tcp_req->req.iov[i].iov_len; 2161 i++; 2162 } 2163 2164 assert(tcp_req->req.iovcnt <= SPDK_NVMF_MAX_SGL_ENTRIES); 2165 tcp_req->data_from_pool = true; 2166 return 0; 2167 2168 nomem: 2169 spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); 2170 tcp_req->req.iovcnt = 0; 2171 return -ENOMEM; 2172 } 2173 2174 static int 2175 spdk_nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_transport *ttransport, 2176 struct spdk_nvmf_tcp_req *tcp_req) 2177 { 2178 struct spdk_nvme_cmd *cmd; 2179 struct spdk_nvme_cpl *rsp; 2180 struct spdk_nvme_sgl_descriptor *sgl; 2181 uint32_t length; 2182 2183 cmd = &tcp_req->req.cmd->nvme_cmd; 2184 rsp = &tcp_req->req.rsp->nvme_cpl; 2185 sgl = &cmd->dptr.sgl1; 2186 2187 length = sgl->unkeyed.length; 2188 2189 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 2190 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 2191 if (length > ttransport->transport.opts.max_io_size) { 2192 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 2193 length, ttransport->transport.opts.max_io_size); 2194 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 2195 return -1; 2196 } 2197 2198 /* fill request length and populate iovs */ 2199 tcp_req->req.length = length; 2200 2201 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length); 2202 2203 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2204 length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx); 2205 tcp_req->elba_length = length; 2206 } 2207 2208 if (spdk_nvmf_tcp_req_fill_iovs(ttransport, tcp_req, length) < 0) { 2209 /* No available buffers. Queue this request up. */ 2210 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n", 2211 tcp_req); 2212 return 0; 2213 } 2214 2215 /* backward compatible */ 2216 tcp_req->req.data = tcp_req->req.iov[0].iov_base; 2217 2218 2219 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n", 2220 tcp_req, 2221 tcp_req->req.iovcnt, tcp_req->req.data); 2222 2223 return 0; 2224 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2225 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2226 uint64_t offset = sgl->address; 2227 uint32_t max_len = ttransport->transport.opts.in_capsule_data_size; 2228 2229 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2230 offset, length); 2231 2232 if (offset > max_len) { 2233 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", 2234 offset, max_len); 2235 rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; 2236 return -1; 2237 } 2238 max_len -= (uint32_t)offset; 2239 2240 if (length > max_len) { 2241 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2242 length, max_len); 2243 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 2244 return -1; 2245 } 2246 2247 tcp_req->req.data = tcp_req->buf + offset; 2248 tcp_req->data_from_pool = false; 2249 tcp_req->req.length = length; 2250 2251 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2252 length = spdk_dif_get_length_with_md(length, &tcp_req->dif_ctx); 2253 tcp_req->elba_length = length; 2254 } 2255 2256 tcp_req->req.iov[0].iov_base = tcp_req->req.data; 2257 tcp_req->req.iov[0].iov_len = length; 2258 tcp_req->req.iovcnt = 1; 2259 2260 return 0; 2261 } 2262 2263 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2264 sgl->generic.type, sgl->generic.subtype); 2265 rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; 2266 return -1; 2267 } 2268 2269 static int 2270 nvmf_tcp_pdu_verify_dif(struct nvme_tcp_pdu *pdu, 2271 const struct spdk_dif_ctx *dif_ctx) 2272 { 2273 struct spdk_dif_error err_blk = {}; 2274 int rc; 2275 2276 rc = spdk_dif_verify_stream(pdu->data_iov, pdu->data_iovcnt, 2277 0, pdu->data_len, pdu->dif_ctx, &err_blk); 2278 if (rc != 0) { 2279 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2280 err_blk.err_type, err_blk.err_offset); 2281 } 2282 2283 return rc; 2284 } 2285 2286 static void 2287 spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2288 struct spdk_nvmf_tcp_req *tcp_req) 2289 { 2290 struct nvme_tcp_pdu *rsp_pdu; 2291 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2292 uint32_t plen, pdo, alignment; 2293 int rc; 2294 2295 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2296 2297 rsp_pdu = spdk_nvmf_tcp_pdu_get(tqpair); 2298 assert(rsp_pdu != NULL); 2299 2300 c2h_data = &rsp_pdu->hdr.c2h_data; 2301 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2302 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2303 2304 if (tqpair->host_hdgst_enable) { 2305 plen += SPDK_NVME_TCP_DIGEST_LEN; 2306 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2307 } 2308 2309 /* set the psh */ 2310 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2311 c2h_data->datal = spdk_min(NVMF_TCP_PDU_MAX_C2H_DATA_SIZE, 2312 tcp_req->req.length - tcp_req->c2h_data_offset); 2313 c2h_data->datao = tcp_req->c2h_data_offset; 2314 2315 /* set the padding */ 2316 rsp_pdu->padding_len = 0; 2317 pdo = plen; 2318 if (tqpair->cpda) { 2319 alignment = (tqpair->cpda + 1) << 2; 2320 if (alignment > plen) { 2321 rsp_pdu->padding_len = alignment - plen; 2322 pdo = plen = alignment; 2323 } 2324 } 2325 2326 c2h_data->common.pdo = pdo; 2327 plen += c2h_data->datal; 2328 if (tqpair->host_ddgst_enable) { 2329 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2330 plen += SPDK_NVME_TCP_DIGEST_LEN; 2331 } 2332 2333 c2h_data->common.plen = plen; 2334 2335 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2336 rsp_pdu->dif_ctx = &tcp_req->dif_ctx; 2337 } 2338 2339 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2340 c2h_data->datao, c2h_data->datal); 2341 2342 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2343 rc = nvmf_tcp_pdu_verify_dif(rsp_pdu, rsp_pdu->dif_ctx); 2344 if (rc != 0) { 2345 /* Data digest error detected by the NVMe/TCP target is treated as non-fatal 2346 * transport error because the cause will be outside the NVMe/TCP target. 2347 * 2348 * On the other hand, treat DIF check error as fatal transport error here 2349 * here because the error is caused by the target itself. Fatal NVMe/TCP 2350 * transport error is handled by terminating the connection. 2351 */ 2352 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 2353 return; 2354 } 2355 } 2356 2357 tcp_req->c2h_data_offset += c2h_data->datal; 2358 if (tcp_req->c2h_data_offset == tcp_req->req.length) { 2359 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair); 2360 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2361 if (tqpair->qpair.transport->opts.c2h_success) { 2362 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2363 } 2364 TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 2365 } 2366 2367 tqpair->c2h_data_pdu_cnt += 1; 2368 spdk_nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, spdk_nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2369 } 2370 2371 static int 2372 spdk_nvmf_tcp_calc_c2h_data_pdu_num(struct spdk_nvmf_tcp_req *tcp_req) 2373 { 2374 return (tcp_req->req.length + NVMF_TCP_PDU_MAX_C2H_DATA_SIZE - 1) / 2375 NVMF_TCP_PDU_MAX_C2H_DATA_SIZE; 2376 } 2377 2378 static void 2379 spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair) 2380 { 2381 struct spdk_nvmf_tcp_req *tcp_req; 2382 2383 while (!TAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) && 2384 (tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) { 2385 tcp_req = TAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req); 2386 spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2387 } 2388 } 2389 2390 static void 2391 spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req, 2392 struct spdk_nvmf_tcp_qpair *tqpair) 2393 { 2394 tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req); 2395 2396 assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM); 2397 2398 TAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link); 2399 spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair); 2400 } 2401 2402 static int 2403 request_transfer_out(struct spdk_nvmf_request *req) 2404 { 2405 struct spdk_nvmf_tcp_req *tcp_req; 2406 struct spdk_nvmf_qpair *qpair; 2407 struct spdk_nvmf_tcp_qpair *tqpair; 2408 struct spdk_nvme_cpl *rsp; 2409 2410 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2411 2412 qpair = req->qpair; 2413 rsp = &req->rsp->nvme_cpl; 2414 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2415 2416 /* Advance our sq_head pointer */ 2417 if (qpair->sq_head == qpair->sq_head_max) { 2418 qpair->sq_head = 0; 2419 } else { 2420 qpair->sq_head++; 2421 } 2422 rsp->sqhd = qpair->sq_head; 2423 2424 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2425 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2426 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && 2427 req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2428 spdk_nvmf_tcp_queue_c2h_data(tcp_req, tqpair); 2429 } else { 2430 spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2431 } 2432 2433 return 0; 2434 } 2435 2436 static void 2437 spdk_nvmf_tcp_pdu_set_buf_from_req(struct spdk_nvmf_tcp_qpair *tqpair, 2438 struct spdk_nvmf_tcp_req *tcp_req) 2439 { 2440 struct nvme_tcp_pdu *pdu; 2441 2442 if (tcp_req->data_from_pool) { 2443 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Will send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2444 tcp_req->next_expected_r2t_offset = 0; 2445 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2446 spdk_nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2447 } else { 2448 pdu = &tqpair->pdu_in_progress; 2449 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 2450 tqpair); 2451 /* No need to send r2t, contained in the capsuled data */ 2452 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2453 0, tcp_req->req.length); 2454 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2455 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2456 } 2457 } 2458 2459 static void 2460 spdk_nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair, 2461 struct spdk_nvmf_tcp_req *tcp_req) 2462 { 2463 struct nvme_tcp_pdu *pdu; 2464 uint32_t plen = 0; 2465 2466 pdu = &tqpair->pdu_in_progress; 2467 plen = pdu->hdr.common.hlen; 2468 2469 if (tqpair->host_hdgst_enable) { 2470 plen += SPDK_NVME_TCP_DIGEST_LEN; 2471 } 2472 2473 if (pdu->hdr.common.plen != plen) { 2474 tcp_req->has_incapsule_data = true; 2475 } 2476 } 2477 2478 static bool 2479 spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 2480 struct spdk_nvmf_tcp_req *tcp_req) 2481 { 2482 struct spdk_nvmf_tcp_qpair *tqpair; 2483 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2484 int rc; 2485 enum spdk_nvmf_tcp_req_state prev_state; 2486 bool progress = false; 2487 struct spdk_nvmf_transport_poll_group *group; 2488 2489 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2490 group = &tqpair->group->group; 2491 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 2492 2493 /* The loop here is to allow for several back-to-back state changes. */ 2494 do { 2495 prev_state = tcp_req->state; 2496 2497 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 2498 tqpair); 2499 2500 switch (tcp_req->state) { 2501 case TCP_REQUEST_STATE_FREE: 2502 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 2503 * to escape this state. */ 2504 break; 2505 case TCP_REQUEST_STATE_NEW: 2506 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0); 2507 2508 /* copy the cmd from the receive pdu */ 2509 tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe; 2510 2511 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->dif_ctx))) { 2512 tcp_req->dif_insert_or_strip = true; 2513 tqpair->pdu_in_progress.dif_ctx = &tcp_req->dif_ctx; 2514 } 2515 2516 /* The next state transition depends on the data transfer needs of this request. */ 2517 tcp_req->req.xfer = spdk_nvmf_tcp_req_get_xfer(tcp_req); 2518 2519 /* If no data to transfer, ready to execute. */ 2520 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 2521 /* Reset the tqpair receving pdu state */ 2522 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2523 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2524 break; 2525 } 2526 2527 spdk_nvmf_tcp_set_incapsule_data(tqpair, tcp_req); 2528 2529 if (!tcp_req->has_incapsule_data) { 2530 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2531 } 2532 2533 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 2534 TAILQ_INSERT_TAIL(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2535 break; 2536 case TCP_REQUEST_STATE_NEED_BUFFER: 2537 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0); 2538 2539 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 2540 2541 if (!tcp_req->has_incapsule_data && 2542 (tcp_req != TAILQ_FIRST(&tqpair->group->pending_data_buf_queue))) { 2543 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, 2544 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", 2545 tcp_req, tqpair); 2546 /* This request needs to wait in line to obtain a buffer */ 2547 break; 2548 } 2549 2550 /* Try to get a data buffer */ 2551 rc = spdk_nvmf_tcp_req_parse_sgl(ttransport, tcp_req); 2552 if (rc < 0) { 2553 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2554 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2555 /* Reset the tqpair receving pdu state */ 2556 spdk_nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2557 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2558 break; 2559 } 2560 2561 if (!tcp_req->req.data) { 2562 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", 2563 tcp_req, tqpair); 2564 /* No buffers available. */ 2565 break; 2566 } 2567 2568 TAILQ_REMOVE(&tqpair->group->pending_data_buf_queue, tcp_req, link); 2569 2570 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 2571 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2572 spdk_nvmf_tcp_pdu_set_buf_from_req(tqpair, tcp_req); 2573 break; 2574 } 2575 2576 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2577 break; 2578 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2579 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, 2580 (uintptr_t)tcp_req, 0); 2581 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 2582 * to escape this state. */ 2583 break; 2584 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 2585 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0); 2586 2587 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2588 assert(tcp_req->elba_length >= tcp_req->req.length); 2589 tcp_req->orig_length = tcp_req->req.length; 2590 tcp_req->req.length = tcp_req->elba_length; 2591 } 2592 2593 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 2594 spdk_nvmf_request_exec(&tcp_req->req); 2595 break; 2596 case TCP_REQUEST_STATE_EXECUTING: 2597 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0); 2598 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 2599 * to escape this state. */ 2600 break; 2601 case TCP_REQUEST_STATE_EXECUTED: 2602 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0); 2603 2604 if (spdk_unlikely(tcp_req->dif_insert_or_strip)) { 2605 tcp_req->req.length = tcp_req->orig_length; 2606 } 2607 2608 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2609 break; 2610 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 2611 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0); 2612 rc = request_transfer_out(&tcp_req->req); 2613 assert(rc == 0); /* No good way to handle this currently */ 2614 break; 2615 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 2616 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, 2617 (uintptr_t)tcp_req, 2618 0); 2619 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 2620 * to escape this state. */ 2621 break; 2622 case TCP_REQUEST_STATE_COMPLETED: 2623 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); 2624 if (tcp_req->data_from_pool) { 2625 spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); 2626 } 2627 tcp_req->req.length = 0; 2628 tcp_req->req.iovcnt = 0; 2629 tcp_req->req.data = NULL; 2630 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 2631 break; 2632 case TCP_REQUEST_NUM_STATES: 2633 default: 2634 assert(0); 2635 break; 2636 } 2637 2638 if (tcp_req->state != prev_state) { 2639 progress = true; 2640 } 2641 } while (tcp_req->state != prev_state); 2642 2643 return progress; 2644 } 2645 2646 static void 2647 spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport, 2648 struct spdk_nvmf_tcp_qpair *tqpair) 2649 { 2650 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 2651 2652 /* Tqpair is not in a good state, so return it */ 2653 if (spdk_unlikely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR)) { 2654 return; 2655 } 2656 2657 2658 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->group->pending_data_buf_queue, link, req_tmp) { 2659 if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) { 2660 break; 2661 } 2662 } 2663 } 2664 2665 static void 2666 spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 2667 { 2668 struct spdk_nvmf_tcp_qpair *tqpair = arg; 2669 struct spdk_nvmf_tcp_transport *ttransport; 2670 int rc; 2671 2672 assert(tqpair != NULL); 2673 2674 ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 2675 spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair); 2676 rc = spdk_nvmf_tcp_sock_process(tqpair); 2677 2678 /* check the following two factors: 2679 * rc: The socket is closed 2680 * State of tqpair: The tqpair is in EXITING state due to internal error 2681 */ 2682 if ((rc < 0) || (tqpair->state == NVME_TCP_QPAIR_STATE_EXITING)) { 2683 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; 2684 spdk_nvmf_tcp_qpair_flush_pdus(tqpair); 2685 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconect the tqpair=%p\n", tqpair); 2686 spdk_poller_unregister(&tqpair->timeout_poller); 2687 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 2688 } 2689 } 2690 2691 static int 2692 spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2693 struct spdk_nvmf_qpair *qpair) 2694 { 2695 struct spdk_nvmf_tcp_poll_group *tgroup; 2696 struct spdk_nvmf_tcp_qpair *tqpair; 2697 int rc; 2698 2699 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2700 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2701 2702 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 2703 spdk_nvmf_tcp_sock_cb, tqpair); 2704 if (rc != 0) { 2705 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 2706 spdk_strerror(errno), errno); 2707 spdk_nvmf_tcp_qpair_destroy(tqpair); 2708 return -1; 2709 } 2710 2711 rc = spdk_nvmf_tcp_qpair_sock_init(tqpair); 2712 if (rc != 0) { 2713 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 2714 spdk_nvmf_tcp_qpair_destroy(tqpair); 2715 return -1; 2716 } 2717 2718 rc = spdk_nvmf_tcp_qpair_init(&tqpair->qpair); 2719 if (rc < 0) { 2720 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 2721 spdk_nvmf_tcp_qpair_destroy(tqpair); 2722 return -1; 2723 } 2724 2725 rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1); 2726 if (rc < 0) { 2727 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 2728 spdk_nvmf_tcp_qpair_destroy(tqpair); 2729 return -1; 2730 } 2731 2732 tqpair->group = tgroup; 2733 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2734 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 2735 2736 return 0; 2737 } 2738 2739 static int 2740 spdk_nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2741 struct spdk_nvmf_qpair *qpair) 2742 { 2743 struct spdk_nvmf_tcp_poll_group *tgroup; 2744 struct spdk_nvmf_tcp_qpair *tqpair; 2745 int rc; 2746 2747 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2748 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2749 2750 assert(tqpair->group == tgroup); 2751 2752 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 2753 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 2754 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 2755 if (rc != 0) { 2756 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 2757 spdk_strerror(errno), errno); 2758 } 2759 2760 return rc; 2761 } 2762 2763 static int 2764 spdk_nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 2765 { 2766 struct spdk_nvmf_tcp_transport *ttransport; 2767 struct spdk_nvmf_tcp_req *tcp_req; 2768 2769 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 2770 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2771 2772 spdk_nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 2773 spdk_nvmf_tcp_req_process(ttransport, tcp_req); 2774 2775 return 0; 2776 } 2777 2778 static void 2779 spdk_nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair) 2780 { 2781 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n"); 2782 2783 spdk_nvmf_tcp_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair)); 2784 } 2785 2786 static int 2787 spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2788 { 2789 struct spdk_nvmf_tcp_poll_group *tgroup; 2790 int rc; 2791 2792 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2793 2794 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) { 2795 return 0; 2796 } 2797 2798 rc = spdk_sock_group_poll(tgroup->sock_group); 2799 if (rc < 0) { 2800 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 2801 return rc; 2802 } 2803 2804 return 0; 2805 } 2806 2807 static int 2808 spdk_nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 2809 struct spdk_nvme_transport_id *trid, bool peer) 2810 { 2811 struct spdk_nvmf_tcp_qpair *tqpair; 2812 uint16_t port; 2813 2814 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2815 trid->trtype = SPDK_NVME_TRANSPORT_TCP; 2816 2817 if (peer) { 2818 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 2819 port = tqpair->initiator_port; 2820 } else { 2821 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 2822 port = tqpair->target_port; 2823 } 2824 2825 if (spdk_sock_is_ipv4(tqpair->sock)) { 2826 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 2827 } else if (spdk_sock_is_ipv4(tqpair->sock)) { 2828 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 2829 } else { 2830 return -1; 2831 } 2832 2833 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 2834 return 0; 2835 } 2836 2837 static int 2838 spdk_nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2839 struct spdk_nvme_transport_id *trid) 2840 { 2841 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2842 } 2843 2844 static int 2845 spdk_nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2846 struct spdk_nvme_transport_id *trid) 2847 { 2848 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 1); 2849 } 2850 2851 static int 2852 spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2853 struct spdk_nvme_transport_id *trid) 2854 { 2855 return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2856 } 2857 2858 static int 2859 spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair) 2860 { 2861 struct spdk_nvmf_tcp_qpair *tqpair; 2862 int rc; 2863 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2864 2865 rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max); 2866 if (!rc) { 2867 tqpair->max_queue_depth += tqpair->qpair.sq_head_max; 2868 tqpair->free_pdu_num += tqpair->qpair.sq_head_max; 2869 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max; 2870 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n", 2871 tqpair->max_queue_depth, tqpair); 2872 } 2873 2874 return rc; 2875 2876 } 2877 2878 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128 2879 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128 2880 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 2881 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 2882 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 2883 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 2884 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 2885 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32 2886 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 2887 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 2888 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 2889 2890 static void 2891 spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 2892 { 2893 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH; 2894 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2895 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 2896 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 2897 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 2898 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH; 2899 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 2900 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 2901 opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 2902 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 2903 opts->sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 2904 } 2905 2906 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 2907 .type = SPDK_NVME_TRANSPORT_TCP, 2908 .opts_init = spdk_nvmf_tcp_opts_init, 2909 .create = spdk_nvmf_tcp_create, 2910 .destroy = spdk_nvmf_tcp_destroy, 2911 2912 .listen = spdk_nvmf_tcp_listen, 2913 .stop_listen = spdk_nvmf_tcp_stop_listen, 2914 .accept = spdk_nvmf_tcp_accept, 2915 2916 .listener_discover = spdk_nvmf_tcp_discover, 2917 2918 .poll_group_create = spdk_nvmf_tcp_poll_group_create, 2919 .get_optimal_poll_group = spdk_nvmf_tcp_get_optimal_poll_group, 2920 .poll_group_destroy = spdk_nvmf_tcp_poll_group_destroy, 2921 .poll_group_add = spdk_nvmf_tcp_poll_group_add, 2922 .poll_group_remove = spdk_nvmf_tcp_poll_group_remove, 2923 .poll_group_poll = spdk_nvmf_tcp_poll_group_poll, 2924 2925 .req_free = spdk_nvmf_tcp_req_free, 2926 .req_complete = spdk_nvmf_tcp_req_complete, 2927 2928 .qpair_fini = spdk_nvmf_tcp_close_qpair, 2929 .qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid, 2930 .qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid, 2931 .qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid, 2932 .qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size, 2933 }; 2934 2935 SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP) 2936