1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 #include "spdk/crc32.h" 36 #include "spdk/endian.h" 37 #include "spdk/assert.h" 38 #include "spdk/thread.h" 39 #include "spdk/nvmf_transport.h" 40 #include "spdk/string.h" 41 #include "spdk/trace.h" 42 #include "spdk/util.h" 43 #include "spdk/log.h" 44 45 #include "spdk_internal/assert.h" 46 #include "spdk_internal/nvme_tcp.h" 47 #include "spdk_internal/sock.h" 48 49 #include "nvmf_internal.h" 50 51 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 52 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 16 53 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 54 #define SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM 32 55 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 56 57 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp; 58 59 /* spdk nvmf related structure */ 60 enum spdk_nvmf_tcp_req_state { 61 62 /* The request is not currently in use */ 63 TCP_REQUEST_STATE_FREE = 0, 64 65 /* Initial state when request first received */ 66 TCP_REQUEST_STATE_NEW, 67 68 /* The request is queued until a data buffer is available. */ 69 TCP_REQUEST_STATE_NEED_BUFFER, 70 71 /* The request is currently transferring data from the host to the controller. */ 72 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 73 74 /* The request is waiting for the R2T send acknowledgement. */ 75 TCP_REQUEST_STATE_AWAITING_R2T_ACK, 76 77 /* The request is ready to execute at the block device */ 78 TCP_REQUEST_STATE_READY_TO_EXECUTE, 79 80 /* The request is currently executing at the block device */ 81 TCP_REQUEST_STATE_EXECUTING, 82 83 /* The request finished executing at the block device */ 84 TCP_REQUEST_STATE_EXECUTED, 85 86 /* The request is ready to send a completion */ 87 TCP_REQUEST_STATE_READY_TO_COMPLETE, 88 89 /* The request is currently transferring final pdus from the controller to the host. */ 90 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 91 92 /* The request completed and can be marked free. */ 93 TCP_REQUEST_STATE_COMPLETED, 94 95 /* Terminator */ 96 TCP_REQUEST_NUM_STATES, 97 }; 98 99 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 100 "Invalid PDU Header Field", 101 "PDU Sequence Error", 102 "Header Digiest Error", 103 "Data Transfer Out of Range", 104 "R2T Limit Exceeded", 105 "Unsupported parameter", 106 }; 107 108 #define OBJECT_NVMF_TCP_IO 0x80 109 110 #define TRACE_GROUP_NVMF_TCP 0x5 111 #define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0) 112 #define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1) 113 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2) 114 #define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3) 115 #define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4) 116 #define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5) 117 #define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6) 118 #define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7) 119 #define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8) 120 #define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9) 121 #define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA) 122 #define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB) 123 #define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC) 124 125 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 126 { 127 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 128 spdk_trace_register_description("TCP_REQ_NEW", 129 TRACE_TCP_REQUEST_STATE_NEW, 130 OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, ""); 131 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 132 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 133 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 134 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 135 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 136 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 137 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 138 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 139 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 140 spdk_trace_register_description("TCP_REQ_EXECUTING", 141 TRACE_TCP_REQUEST_STATE_EXECUTING, 142 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 143 spdk_trace_register_description("TCP_REQ_EXECUTED", 144 TRACE_TCP_REQUEST_STATE_EXECUTED, 145 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 146 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 147 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 148 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 149 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 150 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 151 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 152 spdk_trace_register_description("TCP_REQ_COMPLETED", 153 TRACE_TCP_REQUEST_STATE_COMPLETED, 154 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 155 spdk_trace_register_description("TCP_WRITE_START", 156 TRACE_TCP_FLUSH_WRITEBUF_START, 157 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 158 spdk_trace_register_description("TCP_WRITE_DONE", 159 TRACE_TCP_FLUSH_WRITEBUF_DONE, 160 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 161 spdk_trace_register_description("TCP_READ_DONE", 162 TRACE_TCP_READ_FROM_SOCKET_DONE, 163 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 164 spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", 165 TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 166 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, ""); 167 } 168 169 struct spdk_nvmf_tcp_req { 170 struct spdk_nvmf_request req; 171 struct spdk_nvme_cpl rsp; 172 struct spdk_nvme_cmd cmd; 173 174 /* A PDU that can be used for sending responses. This is 175 * not the incoming PDU! */ 176 struct nvme_tcp_pdu *pdu; 177 178 /* In-capsule data buffer */ 179 uint8_t *buf; 180 /* 181 * The PDU for a request may be used multiple times in serial over 182 * the request's lifetime. For example, first to send an R2T, then 183 * to send a completion. To catch mistakes where the PDU is used 184 * twice at the same time, add a debug flag here for init/fini. 185 */ 186 bool pdu_in_use; 187 bool has_incapsule_data; 188 189 /* transfer_tag */ 190 uint16_t ttag; 191 192 enum spdk_nvmf_tcp_req_state state; 193 194 /* 195 * h2c_offset is used when we receive the h2c_data PDU. 196 */ 197 uint32_t h2c_offset; 198 199 STAILQ_ENTRY(spdk_nvmf_tcp_req) link; 200 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 201 }; 202 203 struct spdk_nvmf_tcp_qpair { 204 struct spdk_nvmf_qpair qpair; 205 struct spdk_nvmf_tcp_poll_group *group; 206 struct spdk_sock *sock; 207 208 enum nvme_tcp_pdu_recv_state recv_state; 209 enum nvme_tcp_qpair_state state; 210 211 /* PDU being actively received */ 212 struct nvme_tcp_pdu pdu_in_progress; 213 214 /* Queues to track the requests in all states */ 215 TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES]; 216 /* Number of requests in each state */ 217 uint32_t state_cntr[TCP_REQUEST_NUM_STATES]; 218 219 uint8_t cpda; 220 221 bool host_hdgst_enable; 222 bool host_ddgst_enable; 223 224 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 225 226 /* This is a spare PDU used for sending special management 227 * operations. Primarily, this is used for the initial 228 * connection response and c2h termination request. */ 229 struct nvme_tcp_pdu *mgmt_pdu; 230 231 /* Arrays of in-capsule buffers, requests, and pdus. 232 * Each array is 'resource_count' number of elements */ 233 void *bufs; 234 struct spdk_nvmf_tcp_req *reqs; 235 struct nvme_tcp_pdu *pdus; 236 uint32_t resource_count; 237 uint32_t recv_buf_size; 238 239 struct spdk_nvmf_tcp_port *port; 240 241 /* IP address */ 242 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 243 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 244 245 /* IP port */ 246 uint16_t initiator_port; 247 uint16_t target_port; 248 249 /* Timer used to destroy qpair after detecting transport error issue if initiator does 250 * not close the connection. 251 */ 252 struct spdk_poller *timeout_poller; 253 254 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 255 }; 256 257 struct spdk_nvmf_tcp_control_msg { 258 STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link; 259 }; 260 261 struct spdk_nvmf_tcp_control_msg_list { 262 void *msg_buf; 263 STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs; 264 }; 265 266 struct spdk_nvmf_tcp_poll_group { 267 struct spdk_nvmf_transport_poll_group group; 268 struct spdk_sock_group *sock_group; 269 270 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 271 272 struct spdk_nvmf_tcp_control_msg_list *control_msg_list; 273 }; 274 275 struct spdk_nvmf_tcp_port { 276 const struct spdk_nvme_transport_id *trid; 277 struct spdk_sock *listen_sock; 278 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 279 }; 280 281 struct tcp_transport_opts { 282 bool c2h_success; 283 uint16_t control_msg_num; 284 uint32_t sock_priority; 285 }; 286 287 struct spdk_nvmf_tcp_transport { 288 struct spdk_nvmf_transport transport; 289 struct tcp_transport_opts tcp_opts; 290 291 pthread_mutex_t lock; 292 293 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 294 }; 295 296 static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = { 297 { 298 "c2h_success", offsetof(struct tcp_transport_opts, c2h_success), 299 spdk_json_decode_bool, true 300 }, 301 { 302 "control_msg_num", offsetof(struct tcp_transport_opts, control_msg_num), 303 spdk_json_decode_uint16, true 304 }, 305 { 306 "sock_priority", offsetof(struct tcp_transport_opts, sock_priority), 307 spdk_json_decode_uint32, true 308 }, 309 }; 310 311 static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 312 struct spdk_nvmf_tcp_req *tcp_req); 313 static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); 314 315 static void 316 nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 317 enum spdk_nvmf_tcp_req_state state) 318 { 319 struct spdk_nvmf_qpair *qpair; 320 struct spdk_nvmf_tcp_qpair *tqpair; 321 322 qpair = tcp_req->req.qpair; 323 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 324 325 TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 326 assert(tqpair->state_cntr[tcp_req->state] > 0); 327 tqpair->state_cntr[tcp_req->state]--; 328 329 TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link); 330 tqpair->state_cntr[state]++; 331 332 tcp_req->state = state; 333 } 334 335 static inline struct nvme_tcp_pdu * 336 nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req) 337 { 338 assert(tcp_req->pdu_in_use == false); 339 tcp_req->pdu_in_use = true; 340 341 memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu)); 342 tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 343 344 return tcp_req->pdu; 345 } 346 347 static inline void 348 nvmf_tcp_req_pdu_fini(struct spdk_nvmf_tcp_req *tcp_req) 349 { 350 tcp_req->pdu_in_use = false; 351 } 352 353 static struct spdk_nvmf_tcp_req * 354 nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 355 { 356 struct spdk_nvmf_tcp_req *tcp_req; 357 358 tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]); 359 if (!tcp_req) { 360 return NULL; 361 } 362 363 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 364 tcp_req->h2c_offset = 0; 365 tcp_req->has_incapsule_data = false; 366 tcp_req->req.dif.dif_insert_or_strip = false; 367 368 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 369 return tcp_req; 370 } 371 372 static void 373 nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req) 374 { 375 struct spdk_nvmf_tcp_transport *ttransport; 376 377 assert(tcp_req != NULL); 378 379 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req=%p will be freed\n", tcp_req); 380 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 381 struct spdk_nvmf_tcp_transport, transport); 382 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 383 nvmf_tcp_req_process(ttransport, tcp_req); 384 } 385 386 static int 387 nvmf_tcp_req_free(struct spdk_nvmf_request *req) 388 { 389 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 390 391 nvmf_tcp_request_free(tcp_req); 392 393 return 0; 394 } 395 396 static void 397 nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 398 enum spdk_nvmf_tcp_req_state state) 399 { 400 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 401 402 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) { 403 nvmf_tcp_request_free(tcp_req); 404 } 405 } 406 407 static void 408 nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 409 { 410 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 411 412 assert(TAILQ_EMPTY(&tqpair->send_queue)); 413 414 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 415 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 416 417 /* Wipe the requests waiting for buffer from the global list */ 418 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link, 419 req_tmp) { 420 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req, 421 spdk_nvmf_request, buf_link); 422 } 423 424 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 425 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 426 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 427 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 428 } 429 430 static void 431 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 432 { 433 int i; 434 struct spdk_nvmf_tcp_req *tcp_req; 435 436 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 437 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 438 SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]); 439 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) { 440 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool); 441 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 442 } 443 } 444 } 445 446 static void 447 nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 448 { 449 int err = 0; 450 451 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 452 453 err = spdk_sock_close(&tqpair->sock); 454 assert(err == 0); 455 nvmf_tcp_cleanup_all_states(tqpair); 456 457 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) { 458 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 459 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 460 tqpair->resource_count); 461 err++; 462 } 463 464 if (err > 0) { 465 nvmf_tcp_dump_qpair_req_contents(tqpair); 466 } 467 468 spdk_dma_free(tqpair->pdus); 469 free(tqpair->reqs); 470 spdk_free(tqpair->bufs); 471 free(tqpair); 472 SPDK_DEBUGLOG(nvmf_tcp, "Leave\n"); 473 } 474 475 static void 476 nvmf_tcp_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w) 477 { 478 struct spdk_nvmf_tcp_transport *ttransport; 479 assert(w != NULL); 480 481 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 482 spdk_json_write_named_bool(w, "c2h_success", ttransport->tcp_opts.c2h_success); 483 spdk_json_write_named_uint32(w, "sock_priority", ttransport->tcp_opts.sock_priority); 484 } 485 486 static int 487 nvmf_tcp_destroy(struct spdk_nvmf_transport *transport, 488 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 489 { 490 struct spdk_nvmf_tcp_transport *ttransport; 491 492 assert(transport != NULL); 493 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 494 495 pthread_mutex_destroy(&ttransport->lock); 496 free(ttransport); 497 498 if (cb_fn) { 499 cb_fn(cb_arg); 500 } 501 return 0; 502 } 503 504 static struct spdk_nvmf_transport * 505 nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 506 { 507 struct spdk_nvmf_tcp_transport *ttransport; 508 uint32_t sge_count; 509 uint32_t min_shared_buffers; 510 511 ttransport = calloc(1, sizeof(*ttransport)); 512 if (!ttransport) { 513 return NULL; 514 } 515 516 TAILQ_INIT(&ttransport->ports); 517 518 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 519 520 ttransport->tcp_opts.c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 521 ttransport->tcp_opts.sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 522 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 523 if (opts->transport_specific != NULL && 524 spdk_json_decode_object_relaxed(opts->transport_specific, tcp_transport_opts_decoder, 525 SPDK_COUNTOF(tcp_transport_opts_decoder), 526 &ttransport->tcp_opts)) { 527 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 528 free(ttransport); 529 return NULL; 530 } 531 532 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 533 534 SPDK_INFOLOG(nvmf_tcp, "*** TCP Transport Init ***\n" 535 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 536 " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 537 " in_capsule_data_size=%d, max_aq_depth=%d\n" 538 " num_shared_buffers=%d, c2h_success=%d,\n" 539 " dif_insert_or_strip=%d, sock_priority=%d\n" 540 " abort_timeout_sec=%d, control_msg_num=%hu\n", 541 opts->max_queue_depth, 542 opts->max_io_size, 543 opts->max_qpairs_per_ctrlr - 1, 544 opts->io_unit_size, 545 opts->in_capsule_data_size, 546 opts->max_aq_depth, 547 opts->num_shared_buffers, 548 ttransport->tcp_opts.c2h_success, 549 opts->dif_insert_or_strip, 550 ttransport->tcp_opts.sock_priority, 551 opts->abort_timeout_sec, 552 ttransport->tcp_opts.control_msg_num); 553 554 if (ttransport->tcp_opts.sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 555 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 556 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 557 ttransport->tcp_opts.sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 558 free(ttransport); 559 return NULL; 560 } 561 562 if (ttransport->tcp_opts.control_msg_num == 0 && 563 opts->in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 564 SPDK_WARNLOG("TCP param control_msg_num can't be 0 if ICD is less than %u bytes. Using default value %u\n", 565 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM); 566 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 567 } 568 569 /* I/O unit size cannot be larger than max I/O size */ 570 if (opts->io_unit_size > opts->max_io_size) { 571 opts->io_unit_size = opts->max_io_size; 572 } 573 574 sge_count = opts->max_io_size / opts->io_unit_size; 575 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 576 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 577 free(ttransport); 578 return NULL; 579 } 580 581 min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size; 582 if (min_shared_buffers > opts->num_shared_buffers) { 583 SPDK_ERRLOG("There are not enough buffers to satisfy" 584 "per-poll group caches for each thread. (%" PRIu32 ")" 585 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 586 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 587 nvmf_tcp_destroy(&ttransport->transport, NULL, NULL); 588 return NULL; 589 } 590 591 pthread_mutex_init(&ttransport->lock, NULL); 592 593 return &ttransport->transport; 594 } 595 596 static int 597 nvmf_tcp_trsvcid_to_int(const char *trsvcid) 598 { 599 unsigned long long ull; 600 char *end = NULL; 601 602 ull = strtoull(trsvcid, &end, 10); 603 if (end == NULL || end == trsvcid || *end != '\0') { 604 return -1; 605 } 606 607 /* Valid TCP/IP port numbers are in [0, 65535] */ 608 if (ull > 65535) { 609 return -1; 610 } 611 612 return (int)ull; 613 } 614 615 /** 616 * Canonicalize a listen address trid. 617 */ 618 static int 619 nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 620 const struct spdk_nvme_transport_id *trid) 621 { 622 int trsvcid_int; 623 624 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 625 if (trsvcid_int < 0) { 626 return -EINVAL; 627 } 628 629 memset(canon_trid, 0, sizeof(*canon_trid)); 630 spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP); 631 canon_trid->adrfam = trid->adrfam; 632 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 633 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 634 635 return 0; 636 } 637 638 /** 639 * Find an existing listening port. 640 * 641 * Caller must hold ttransport->lock. 642 */ 643 static struct spdk_nvmf_tcp_port * 644 nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 645 const struct spdk_nvme_transport_id *trid) 646 { 647 struct spdk_nvme_transport_id canon_trid; 648 struct spdk_nvmf_tcp_port *port; 649 650 if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 651 return NULL; 652 } 653 654 TAILQ_FOREACH(port, &ttransport->ports, link) { 655 if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) { 656 return port; 657 } 658 } 659 660 return NULL; 661 } 662 663 static int 664 nvmf_tcp_listen(struct spdk_nvmf_transport *transport, 665 const struct spdk_nvme_transport_id *trid) 666 { 667 struct spdk_nvmf_tcp_transport *ttransport; 668 struct spdk_nvmf_tcp_port *port; 669 int trsvcid_int; 670 uint8_t adrfam; 671 struct spdk_sock_opts opts; 672 673 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 674 675 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 676 if (trsvcid_int < 0) { 677 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 678 return -EINVAL; 679 } 680 681 pthread_mutex_lock(&ttransport->lock); 682 port = calloc(1, sizeof(*port)); 683 if (!port) { 684 SPDK_ERRLOG("Port allocation failed\n"); 685 pthread_mutex_unlock(&ttransport->lock); 686 return -ENOMEM; 687 } 688 689 port->trid = trid; 690 opts.opts_size = sizeof(opts); 691 spdk_sock_get_default_opts(&opts); 692 opts.priority = ttransport->tcp_opts.sock_priority; 693 port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int, 694 NULL, &opts); 695 if (port->listen_sock == NULL) { 696 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 697 trid->traddr, trsvcid_int, 698 spdk_strerror(errno), errno); 699 free(port); 700 pthread_mutex_unlock(&ttransport->lock); 701 return -errno; 702 } 703 704 if (spdk_sock_is_ipv4(port->listen_sock)) { 705 adrfam = SPDK_NVMF_ADRFAM_IPV4; 706 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 707 adrfam = SPDK_NVMF_ADRFAM_IPV6; 708 } else { 709 SPDK_ERRLOG("Unhandled socket type\n"); 710 adrfam = 0; 711 } 712 713 if (adrfam != trid->adrfam) { 714 SPDK_ERRLOG("Socket address family mismatch\n"); 715 spdk_sock_close(&port->listen_sock); 716 free(port); 717 pthread_mutex_unlock(&ttransport->lock); 718 return -EINVAL; 719 } 720 721 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n", 722 trid->traddr, trid->trsvcid); 723 724 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 725 pthread_mutex_unlock(&ttransport->lock); 726 return 0; 727 } 728 729 static void 730 nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 731 const struct spdk_nvme_transport_id *trid) 732 { 733 struct spdk_nvmf_tcp_transport *ttransport; 734 struct spdk_nvmf_tcp_port *port; 735 736 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 737 738 SPDK_DEBUGLOG(nvmf_tcp, "Removing listen address %s port %s\n", 739 trid->traddr, trid->trsvcid); 740 741 pthread_mutex_lock(&ttransport->lock); 742 port = nvmf_tcp_find_port(ttransport, trid); 743 if (port) { 744 TAILQ_REMOVE(&ttransport->ports, port, link); 745 spdk_sock_close(&port->listen_sock); 746 free(port); 747 } 748 749 pthread_mutex_unlock(&ttransport->lock); 750 } 751 752 static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 753 enum nvme_tcp_pdu_recv_state state); 754 755 static void 756 nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair) 757 { 758 SPDK_DEBUGLOG(nvmf_tcp, "Disconnecting qpair %p\n", tqpair); 759 760 if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) { 761 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 762 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 763 spdk_poller_unregister(&tqpair->timeout_poller); 764 765 /* This will end up calling nvmf_tcp_close_qpair */ 766 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 767 } 768 } 769 770 static void 771 _pdu_write_done(void *_pdu, int err) 772 { 773 struct nvme_tcp_pdu *pdu = _pdu; 774 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 775 776 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 777 778 if (err != 0) { 779 nvmf_tcp_qpair_disconnect(tqpair); 780 return; 781 } 782 783 assert(pdu->cb_fn != NULL); 784 pdu->cb_fn(pdu->cb_arg); 785 } 786 787 static void 788 nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 789 struct nvme_tcp_pdu *pdu, 790 nvme_tcp_qpair_xfer_complete_cb cb_fn, 791 void *cb_arg) 792 { 793 int hlen; 794 uint32_t crc32c; 795 uint32_t mapped_length = 0; 796 ssize_t rc; 797 798 assert(&tqpair->pdu_in_progress != pdu); 799 800 hlen = pdu->hdr.common.hlen; 801 802 /* Header Digest */ 803 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { 804 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 805 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 806 } 807 808 /* Data Digest */ 809 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { 810 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 811 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 812 } 813 814 pdu->cb_fn = cb_fn; 815 pdu->cb_arg = cb_arg; 816 817 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 818 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, 819 &mapped_length); 820 pdu->sock_req.cb_fn = _pdu_write_done; 821 pdu->sock_req.cb_arg = pdu; 822 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 823 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 824 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 825 rc = spdk_sock_writev(tqpair->sock, pdu->iov, pdu->sock_req.iovcnt); 826 if (rc == mapped_length) { 827 _pdu_write_done(pdu, 0); 828 } else { 829 SPDK_ERRLOG("IC_RESP or TERM_REQ could not write to socket.\n"); 830 _pdu_write_done(pdu, -1); 831 } 832 } else { 833 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 834 } 835 } 836 837 static int 838 nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair) 839 { 840 uint32_t i; 841 struct spdk_nvmf_transport_opts *opts; 842 uint32_t in_capsule_data_size; 843 844 opts = &tqpair->qpair.transport->opts; 845 846 in_capsule_data_size = opts->in_capsule_data_size; 847 if (opts->dif_insert_or_strip) { 848 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 849 } 850 851 tqpair->resource_count = opts->max_queue_depth; 852 853 tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs)); 854 if (!tqpair->reqs) { 855 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 856 return -1; 857 } 858 859 if (in_capsule_data_size) { 860 tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000, 861 NULL, SPDK_ENV_LCORE_ID_ANY, 862 SPDK_MALLOC_DMA); 863 if (!tqpair->bufs) { 864 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 865 return -1; 866 } 867 } 868 869 /* Add addtional one member, which will be used for mgmt_pdu owned by the tqpair */ 870 tqpair->pdus = spdk_dma_malloc((tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000, NULL); 871 if (!tqpair->pdus) { 872 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 873 return -1; 874 } 875 876 for (i = 0; i < tqpair->resource_count; i++) { 877 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 878 879 tcp_req->ttag = i + 1; 880 tcp_req->req.qpair = &tqpair->qpair; 881 882 tcp_req->pdu = &tqpair->pdus[i]; 883 tcp_req->pdu->qpair = tqpair; 884 885 /* Set up memory to receive commands */ 886 if (tqpair->bufs) { 887 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 888 } 889 890 /* Set the cmdn and rsp */ 891 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 892 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 893 894 /* Initialize request state to FREE */ 895 tcp_req->state = TCP_REQUEST_STATE_FREE; 896 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link); 897 tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++; 898 } 899 900 tqpair->mgmt_pdu = &tqpair->pdus[i]; 901 tqpair->mgmt_pdu->qpair = tqpair; 902 903 tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * 904 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 905 906 return 0; 907 } 908 909 static int 910 nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 911 { 912 struct spdk_nvmf_tcp_qpair *tqpair; 913 int i; 914 915 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 916 917 SPDK_DEBUGLOG(nvmf_tcp, "New TCP Connection: %p\n", qpair); 918 919 TAILQ_INIT(&tqpair->send_queue); 920 921 /* Initialise request state queues of the qpair */ 922 for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) { 923 TAILQ_INIT(&tqpair->state_queue[i]); 924 } 925 926 tqpair->host_hdgst_enable = true; 927 tqpair->host_ddgst_enable = true; 928 929 return 0; 930 } 931 932 static int 933 nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 934 { 935 int rc; 936 937 /* set low water mark */ 938 rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 939 if (rc != 0) { 940 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 941 return rc; 942 } 943 944 return 0; 945 } 946 947 static void 948 nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, 949 struct spdk_nvmf_tcp_port *port, 950 struct spdk_sock *sock) 951 { 952 struct spdk_nvmf_tcp_qpair *tqpair; 953 int rc; 954 955 SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n", 956 port->trid->traddr, port->trid->trsvcid); 957 958 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 959 if (tqpair == NULL) { 960 SPDK_ERRLOG("Could not allocate new connection.\n"); 961 spdk_sock_close(&sock); 962 return; 963 } 964 965 tqpair->sock = sock; 966 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0; 967 tqpair->port = port; 968 tqpair->qpair.transport = transport; 969 tqpair->qpair.trid = port->trid; 970 971 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 972 sizeof(tqpair->target_addr), &tqpair->target_port, 973 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 974 &tqpair->initiator_port); 975 if (rc < 0) { 976 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 977 nvmf_tcp_qpair_destroy(tqpair); 978 return; 979 } 980 981 spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair); 982 } 983 984 static uint32_t 985 nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port) 986 { 987 struct spdk_sock *sock; 988 uint32_t count = 0; 989 int i; 990 991 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 992 sock = spdk_sock_accept(port->listen_sock); 993 if (sock == NULL) { 994 break; 995 } 996 count++; 997 nvmf_tcp_handle_connect(transport, port, sock); 998 } 999 1000 return count; 1001 } 1002 1003 static uint32_t 1004 nvmf_tcp_accept(struct spdk_nvmf_transport *transport) 1005 { 1006 struct spdk_nvmf_tcp_transport *ttransport; 1007 struct spdk_nvmf_tcp_port *port; 1008 uint32_t count = 0; 1009 1010 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1011 1012 TAILQ_FOREACH(port, &ttransport->ports, link) { 1013 count += nvmf_tcp_port_accept(transport, port); 1014 } 1015 1016 return count; 1017 } 1018 1019 static void 1020 nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1021 struct spdk_nvme_transport_id *trid, 1022 struct spdk_nvmf_discovery_log_page_entry *entry) 1023 { 1024 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1025 entry->adrfam = trid->adrfam; 1026 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; 1027 1028 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1029 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1030 1031 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1032 } 1033 1034 static struct spdk_nvmf_tcp_control_msg_list * 1035 nvmf_tcp_control_msg_list_create(uint16_t num_messages) 1036 { 1037 struct spdk_nvmf_tcp_control_msg_list *list; 1038 struct spdk_nvmf_tcp_control_msg *msg; 1039 uint16_t i; 1040 1041 list = calloc(1, sizeof(*list)); 1042 if (!list) { 1043 SPDK_ERRLOG("Failed to allocate memory for list structure\n"); 1044 return NULL; 1045 } 1046 1047 list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, 1048 NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 1049 if (!list->msg_buf) { 1050 SPDK_ERRLOG("Failed to allocate memory for control message buffers\n"); 1051 free(list); 1052 return NULL; 1053 } 1054 1055 STAILQ_INIT(&list->free_msgs); 1056 1057 for (i = 0; i < num_messages; i++) { 1058 msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i * 1059 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1060 STAILQ_INSERT_TAIL(&list->free_msgs, msg, link); 1061 } 1062 1063 return list; 1064 } 1065 1066 static void 1067 nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list) 1068 { 1069 if (!list) { 1070 return; 1071 } 1072 1073 spdk_free(list->msg_buf); 1074 free(list); 1075 } 1076 1077 static struct spdk_nvmf_transport_poll_group * 1078 nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) 1079 { 1080 struct spdk_nvmf_tcp_transport *ttransport; 1081 struct spdk_nvmf_tcp_poll_group *tgroup; 1082 1083 tgroup = calloc(1, sizeof(*tgroup)); 1084 if (!tgroup) { 1085 return NULL; 1086 } 1087 1088 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1089 if (!tgroup->sock_group) { 1090 goto cleanup; 1091 } 1092 1093 TAILQ_INIT(&tgroup->qpairs); 1094 1095 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1096 1097 if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 1098 SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). " 1099 "Creating control messages list\n", transport->opts.in_capsule_data_size, 1100 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1101 tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(ttransport->tcp_opts.control_msg_num); 1102 if (!tgroup->control_msg_list) { 1103 goto cleanup; 1104 } 1105 } 1106 1107 return &tgroup->group; 1108 1109 cleanup: 1110 nvmf_tcp_poll_group_destroy(&tgroup->group); 1111 return NULL; 1112 } 1113 1114 static struct spdk_nvmf_transport_poll_group * 1115 nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1116 { 1117 struct spdk_nvmf_tcp_qpair *tqpair; 1118 struct spdk_sock_group *group = NULL; 1119 int rc; 1120 1121 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1122 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group); 1123 if (!rc && group != NULL) { 1124 return spdk_sock_group_get_ctx(group); 1125 } 1126 1127 return NULL; 1128 } 1129 1130 static void 1131 nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1132 { 1133 struct spdk_nvmf_tcp_poll_group *tgroup; 1134 1135 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1136 spdk_sock_group_close(&tgroup->sock_group); 1137 if (tgroup->control_msg_list) { 1138 nvmf_tcp_control_msg_list_free(tgroup->control_msg_list); 1139 } 1140 1141 free(tgroup); 1142 } 1143 1144 static void 1145 nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1146 enum nvme_tcp_pdu_recv_state state) 1147 { 1148 if (tqpair->recv_state == state) { 1149 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1150 tqpair, state); 1151 return; 1152 } 1153 1154 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state); 1155 tqpair->recv_state = state; 1156 1157 switch (state) { 1158 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1159 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1160 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1161 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: 1162 break; 1163 case NVME_TCP_PDU_RECV_STATE_ERROR: 1164 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1165 memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress)); 1166 break; 1167 default: 1168 SPDK_ERRLOG("The state(%d) is invalid\n", state); 1169 abort(); 1170 break; 1171 } 1172 } 1173 1174 static int 1175 nvmf_tcp_qpair_handle_timeout(void *ctx) 1176 { 1177 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1178 1179 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1180 1181 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1182 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1183 1184 nvmf_tcp_qpair_disconnect(tqpair); 1185 return SPDK_POLLER_BUSY; 1186 } 1187 1188 static void 1189 nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1190 { 1191 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1192 1193 if (!tqpair->timeout_poller) { 1194 tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair, 1195 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1196 } 1197 } 1198 1199 static void 1200 nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1201 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1202 { 1203 struct nvme_tcp_pdu *rsp_pdu; 1204 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1205 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1206 uint32_t copy_len; 1207 1208 rsp_pdu = tqpair->mgmt_pdu; 1209 1210 c2h_term_req = &rsp_pdu->hdr.term_req; 1211 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1212 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1213 1214 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1215 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1216 DSET32(&c2h_term_req->fei, error_offset); 1217 } 1218 1219 copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); 1220 1221 /* Copy the error info into the buffer */ 1222 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1223 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1224 1225 /* Contain the header of the wrong received pdu */ 1226 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1227 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1228 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_c2h_term_req_complete, tqpair); 1229 } 1230 1231 static void 1232 nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1233 struct spdk_nvmf_tcp_qpair *tqpair, 1234 struct nvme_tcp_pdu *pdu) 1235 { 1236 struct spdk_nvmf_tcp_req *tcp_req; 1237 1238 assert(pdu->psh_valid_bytes == pdu->psh_len); 1239 assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD); 1240 1241 tcp_req = nvmf_tcp_req_get(tqpair); 1242 if (!tcp_req) { 1243 /* Directly return and make the allocation retry again */ 1244 if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0) { 1245 return; 1246 } 1247 1248 /* The host sent more commands than the maximum queue depth. */ 1249 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 1250 nvmf_tcp_qpair_disconnect(tqpair); 1251 return; 1252 } 1253 1254 pdu->req = tcp_req; 1255 assert(tcp_req->state == TCP_REQUEST_STATE_NEW); 1256 nvmf_tcp_req_process(ttransport, tcp_req); 1257 } 1258 1259 static void 1260 nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1261 struct spdk_nvmf_tcp_qpair *tqpair, 1262 struct nvme_tcp_pdu *pdu) 1263 { 1264 struct spdk_nvmf_tcp_req *tcp_req; 1265 struct spdk_nvme_tcp_cmd *capsule_cmd; 1266 uint32_t error_offset = 0; 1267 enum spdk_nvme_tcp_term_req_fes fes; 1268 1269 capsule_cmd = &pdu->hdr.capsule_cmd; 1270 tcp_req = pdu->req; 1271 assert(tcp_req != NULL); 1272 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1273 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1274 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1275 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1276 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1277 goto err; 1278 } 1279 1280 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1281 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1282 nvmf_tcp_req_process(ttransport, tcp_req); 1283 1284 return; 1285 err: 1286 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1287 } 1288 1289 static int 1290 nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair, 1291 enum spdk_nvmf_tcp_req_state state, 1292 uint16_t cid, uint16_t tag, 1293 struct spdk_nvmf_tcp_req **req) 1294 { 1295 struct spdk_nvmf_tcp_req *tcp_req = NULL; 1296 1297 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) { 1298 if (tcp_req->req.cmd->nvme_cmd.cid != cid) { 1299 continue; 1300 } 1301 1302 if (tcp_req->ttag == tag) { 1303 *req = tcp_req; 1304 return 0; 1305 } 1306 1307 *req = NULL; 1308 return -1; 1309 } 1310 1311 /* Didn't find it, but not an error */ 1312 *req = NULL; 1313 return 0; 1314 } 1315 1316 static void 1317 nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1318 struct spdk_nvmf_tcp_qpair *tqpair, 1319 struct nvme_tcp_pdu *pdu) 1320 { 1321 struct spdk_nvmf_tcp_req *tcp_req; 1322 uint32_t error_offset = 0; 1323 enum spdk_nvme_tcp_term_req_fes fes = 0; 1324 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1325 int rc; 1326 1327 h2c_data = &pdu->hdr.h2c_data; 1328 1329 SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1330 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1331 1332 rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 1333 h2c_data->cccid, h2c_data->ttag, &tcp_req); 1334 if (rc == 0 && tcp_req == NULL) { 1335 rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid, 1336 h2c_data->ttag, &tcp_req); 1337 } 1338 1339 if (!tcp_req) { 1340 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req is not found for tqpair=%p\n", tqpair); 1341 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 1342 if (rc == 0) { 1343 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1344 } else { 1345 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1346 } 1347 goto err; 1348 } 1349 1350 if (tcp_req->h2c_offset != h2c_data->datao) { 1351 SPDK_DEBUGLOG(nvmf_tcp, 1352 "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n", 1353 tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao); 1354 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1355 goto err; 1356 } 1357 1358 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1359 SPDK_DEBUGLOG(nvmf_tcp, 1360 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n", 1361 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1362 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1363 goto err; 1364 } 1365 1366 pdu->req = tcp_req; 1367 1368 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { 1369 pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 1370 } 1371 1372 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 1373 h2c_data->datao, h2c_data->datal); 1374 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1375 return; 1376 1377 err: 1378 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1379 } 1380 1381 static void 1382 nvmf_tcp_pdu_cmd_complete(void *cb_arg) 1383 { 1384 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1385 nvmf_tcp_request_free(tcp_req); 1386 } 1387 1388 static void 1389 nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 1390 struct spdk_nvmf_tcp_qpair *tqpair) 1391 { 1392 struct nvme_tcp_pdu *rsp_pdu; 1393 struct spdk_nvme_tcp_rsp *capsule_resp; 1394 1395 SPDK_DEBUGLOG(nvmf_tcp, "enter, tqpair=%p\n", tqpair); 1396 1397 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1398 assert(rsp_pdu != NULL); 1399 1400 capsule_resp = &rsp_pdu->hdr.capsule_resp; 1401 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1402 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 1403 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 1404 if (tqpair->host_hdgst_enable) { 1405 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1406 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1407 } 1408 1409 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_cmd_complete, tcp_req); 1410 } 1411 1412 static void 1413 nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 1414 { 1415 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1416 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 1417 struct spdk_nvmf_tcp_qpair, qpair); 1418 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF( 1419 tcp_req->req.qpair->transport, struct spdk_nvmf_tcp_transport, transport); 1420 1421 assert(tqpair != NULL); 1422 if (ttransport->tcp_opts.c2h_success) { 1423 nvmf_tcp_request_free(tcp_req); 1424 } else { 1425 nvmf_tcp_req_pdu_fini(tcp_req); 1426 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 1427 } 1428 } 1429 1430 static void 1431 nvmf_tcp_r2t_complete(void *cb_arg) 1432 { 1433 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1434 struct spdk_nvmf_tcp_transport *ttransport; 1435 1436 nvmf_tcp_req_pdu_fini(tcp_req); 1437 1438 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 1439 struct spdk_nvmf_tcp_transport, transport); 1440 1441 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 1442 1443 if (tcp_req->h2c_offset == tcp_req->req.length) { 1444 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1445 nvmf_tcp_req_process(ttransport, tcp_req); 1446 } 1447 } 1448 1449 static void 1450 nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1451 struct spdk_nvmf_tcp_req *tcp_req) 1452 { 1453 struct nvme_tcp_pdu *rsp_pdu; 1454 struct spdk_nvme_tcp_r2t_hdr *r2t; 1455 1456 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1457 assert(rsp_pdu != NULL); 1458 1459 r2t = &rsp_pdu->hdr.r2t; 1460 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 1461 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 1462 1463 if (tqpair->host_hdgst_enable) { 1464 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1465 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1466 } 1467 1468 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 1469 r2t->ttag = tcp_req->ttag; 1470 r2t->r2to = tcp_req->h2c_offset; 1471 r2t->r2tl = tcp_req->req.length; 1472 1473 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 1474 1475 SPDK_DEBUGLOG(nvmf_tcp, 1476 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 1477 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 1478 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_r2t_complete, tcp_req); 1479 } 1480 1481 static void 1482 nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1483 struct spdk_nvmf_tcp_qpair *tqpair, 1484 struct nvme_tcp_pdu *pdu) 1485 { 1486 struct spdk_nvmf_tcp_req *tcp_req; 1487 1488 tcp_req = pdu->req; 1489 assert(tcp_req != NULL); 1490 1491 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1492 1493 tcp_req->h2c_offset += pdu->data_len; 1494 1495 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1496 1497 /* Wait for all of the data to arrive AND for the initial R2T PDU send to be 1498 * acknowledged before moving on. */ 1499 if (tcp_req->h2c_offset == tcp_req->req.length && 1500 tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { 1501 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1502 nvmf_tcp_req_process(ttransport, tcp_req); 1503 } 1504 } 1505 1506 static void 1507 nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 1508 { 1509 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 1510 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 1511 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1512 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1513 SPDK_DEBUGLOG(nvmf_tcp, "The offset from the start of the PDU header is %u\n", 1514 DGET32(h2c_term_req->fei)); 1515 } 1516 } 1517 1518 static void 1519 nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1520 struct nvme_tcp_pdu *pdu) 1521 { 1522 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1523 uint32_t error_offset = 0; 1524 enum spdk_nvme_tcp_term_req_fes fes; 1525 1526 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1527 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 1528 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1529 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1530 goto end; 1531 } 1532 1533 /* set the data buffer */ 1534 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 1535 h2c_term_req->common.plen - h2c_term_req->common.hlen); 1536 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1537 return; 1538 end: 1539 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1540 } 1541 1542 static void 1543 nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1544 struct nvme_tcp_pdu *pdu) 1545 { 1546 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1547 1548 nvmf_tcp_h2c_term_req_dump(h2c_term_req); 1549 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1550 } 1551 1552 static void 1553 nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1554 struct spdk_nvmf_tcp_transport *ttransport) 1555 { 1556 int rc = 0; 1557 struct nvme_tcp_pdu *pdu; 1558 uint32_t crc32c, error_offset = 0; 1559 enum spdk_nvme_tcp_term_req_fes fes; 1560 1561 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1562 pdu = &tqpair->pdu_in_progress; 1563 1564 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1565 /* check data digest if need */ 1566 if (pdu->ddgst_enable) { 1567 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1568 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1569 if (rc == 0) { 1570 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1571 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1572 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1573 return; 1574 1575 } 1576 } 1577 1578 switch (pdu->hdr.common.pdu_type) { 1579 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1580 nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 1581 break; 1582 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1583 nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 1584 break; 1585 1586 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1587 nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 1588 break; 1589 1590 default: 1591 /* The code should not go to here */ 1592 SPDK_ERRLOG("The code should not go to here\n"); 1593 break; 1594 } 1595 } 1596 1597 static void 1598 nvmf_tcp_send_icresp_complete(void *cb_arg) 1599 { 1600 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 1601 1602 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1603 } 1604 1605 static void 1606 nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 1607 struct spdk_nvmf_tcp_qpair *tqpair, 1608 struct nvme_tcp_pdu *pdu) 1609 { 1610 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 1611 struct nvme_tcp_pdu *rsp_pdu; 1612 struct spdk_nvme_tcp_ic_resp *ic_resp; 1613 uint32_t error_offset = 0; 1614 enum spdk_nvme_tcp_term_req_fes fes; 1615 1616 /* Only PFV 0 is defined currently */ 1617 if (ic_req->pfv != 0) { 1618 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 1619 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1620 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 1621 goto end; 1622 } 1623 1624 /* MAXR2T is 0's based */ 1625 SPDK_DEBUGLOG(nvmf_tcp, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 1626 1627 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 1628 if (!tqpair->host_hdgst_enable) { 1629 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1630 } 1631 1632 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 1633 if (!tqpair->host_ddgst_enable) { 1634 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1635 } 1636 1637 tqpair->recv_buf_size = spdk_max(tqpair->recv_buf_size, MIN_SOCK_PIPE_SIZE); 1638 /* Now that we know whether digests are enabled, properly size the receive buffer */ 1639 if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) { 1640 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1641 tqpair, 1642 tqpair->recv_buf_size); 1643 /* Not fatal. */ 1644 } 1645 1646 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 1647 SPDK_DEBUGLOG(nvmf_tcp, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 1648 1649 rsp_pdu = tqpair->mgmt_pdu; 1650 1651 ic_resp = &rsp_pdu->hdr.ic_resp; 1652 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 1653 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 1654 ic_resp->pfv = 0; 1655 ic_resp->cpda = tqpair->cpda; 1656 ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size; 1657 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 1658 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 1659 1660 SPDK_DEBUGLOG(nvmf_tcp, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 1661 SPDK_DEBUGLOG(nvmf_tcp, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 1662 1663 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1664 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_icresp_complete, tqpair); 1665 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1666 return; 1667 end: 1668 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1669 } 1670 1671 static void 1672 nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1673 struct spdk_nvmf_tcp_transport *ttransport) 1674 { 1675 struct nvme_tcp_pdu *pdu; 1676 int rc; 1677 uint32_t crc32c, error_offset = 0; 1678 enum spdk_nvme_tcp_term_req_fes fes; 1679 1680 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1681 pdu = &tqpair->pdu_in_progress; 1682 1683 SPDK_DEBUGLOG(nvmf_tcp, "pdu type of tqpair(%p) is %d\n", tqpair, 1684 pdu->hdr.common.pdu_type); 1685 /* check header digest if needed */ 1686 if (pdu->has_hdgst) { 1687 SPDK_DEBUGLOG(nvmf_tcp, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 1688 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1689 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1690 if (rc == 0) { 1691 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1692 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1693 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1694 return; 1695 1696 } 1697 } 1698 1699 switch (pdu->hdr.common.pdu_type) { 1700 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 1701 nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 1702 break; 1703 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1704 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ); 1705 break; 1706 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1707 nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 1708 break; 1709 1710 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1711 nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 1712 break; 1713 1714 default: 1715 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type); 1716 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1717 error_offset = 1; 1718 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1719 break; 1720 } 1721 } 1722 1723 static void 1724 nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 1725 { 1726 struct nvme_tcp_pdu *pdu; 1727 uint32_t error_offset = 0; 1728 enum spdk_nvme_tcp_term_req_fes fes; 1729 uint8_t expected_hlen, pdo; 1730 bool plen_error = false, pdo_error = false; 1731 1732 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1733 pdu = &tqpair->pdu_in_progress; 1734 1735 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 1736 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 1737 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 1738 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1739 goto err; 1740 } 1741 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 1742 if (pdu->hdr.common.plen != expected_hlen) { 1743 plen_error = true; 1744 } 1745 } else { 1746 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1747 SPDK_ERRLOG("The TCP/IP connection is not negotitated\n"); 1748 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1749 goto err; 1750 } 1751 1752 switch (pdu->hdr.common.pdu_type) { 1753 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1754 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 1755 pdo = pdu->hdr.common.pdo; 1756 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1757 pdo_error = true; 1758 break; 1759 } 1760 1761 if (pdu->hdr.common.plen < expected_hlen) { 1762 plen_error = true; 1763 } 1764 break; 1765 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1766 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 1767 pdo = pdu->hdr.common.pdo; 1768 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) { 1769 pdo_error = true; 1770 break; 1771 } 1772 if (pdu->hdr.common.plen < expected_hlen) { 1773 plen_error = true; 1774 } 1775 break; 1776 1777 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1778 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 1779 if ((pdu->hdr.common.plen <= expected_hlen) || 1780 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 1781 plen_error = true; 1782 } 1783 break; 1784 1785 default: 1786 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 1787 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1788 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 1789 goto err; 1790 } 1791 } 1792 1793 if (pdu->hdr.common.hlen != expected_hlen) { 1794 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 1795 pdu->hdr.common.pdu_type, 1796 expected_hlen, pdu->hdr.common.hlen, tqpair); 1797 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1798 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 1799 goto err; 1800 } else if (pdo_error) { 1801 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1802 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1803 } else if (plen_error) { 1804 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1805 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 1806 goto err; 1807 } else { 1808 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1809 nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable); 1810 return; 1811 } 1812 err: 1813 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1814 } 1815 1816 static int 1817 nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset, 1818 int read_len) 1819 { 1820 int rc; 1821 1822 rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 1823 read_offset, read_len, pdu->dif_ctx); 1824 if (rc != 0) { 1825 SPDK_ERRLOG("DIF generate failed\n"); 1826 } 1827 1828 return rc; 1829 } 1830 1831 static int 1832 nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 1833 { 1834 int rc = 0; 1835 struct nvme_tcp_pdu *pdu; 1836 enum nvme_tcp_pdu_recv_state prev_state; 1837 uint32_t data_len; 1838 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 1839 struct spdk_nvmf_tcp_transport, transport); 1840 1841 /* The loop here is to allow for several back-to-back state changes. */ 1842 do { 1843 prev_state = tqpair->recv_state; 1844 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 1845 1846 pdu = &tqpair->pdu_in_progress; 1847 switch (tqpair->recv_state) { 1848 /* Wait for the common header */ 1849 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1850 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1851 if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 1852 return rc; 1853 } 1854 1855 rc = nvme_tcp_read_data(tqpair->sock, 1856 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1857 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 1858 if (rc < 0) { 1859 SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair); 1860 return NVME_TCP_PDU_FATAL; 1861 } else if (rc > 0) { 1862 pdu->ch_valid_bytes += rc; 1863 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0); 1864 if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) { 1865 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1866 } 1867 } 1868 1869 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1870 return NVME_TCP_PDU_IN_PROGRESS; 1871 } 1872 1873 /* The command header of this PDU has now been read from the socket. */ 1874 nvmf_tcp_pdu_ch_handle(tqpair); 1875 break; 1876 /* Wait for the pdu specific header */ 1877 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1878 rc = nvme_tcp_read_data(tqpair->sock, 1879 pdu->psh_len - pdu->psh_valid_bytes, 1880 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1881 if (rc < 0) { 1882 return NVME_TCP_PDU_FATAL; 1883 } else if (rc > 0) { 1884 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 1885 0, rc, 0, 0); 1886 pdu->psh_valid_bytes += rc; 1887 } 1888 1889 if (pdu->psh_valid_bytes < pdu->psh_len) { 1890 return NVME_TCP_PDU_IN_PROGRESS; 1891 } 1892 1893 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1894 nvmf_tcp_pdu_psh_handle(tqpair, ttransport); 1895 break; 1896 /* Wait for the req slot */ 1897 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: 1898 nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 1899 break; 1900 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1901 /* check whether the data is valid, if not we just return */ 1902 if (!pdu->data_len) { 1903 return NVME_TCP_PDU_IN_PROGRESS; 1904 } 1905 1906 data_len = pdu->data_len; 1907 /* data digest */ 1908 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 1909 tqpair->host_ddgst_enable)) { 1910 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1911 pdu->ddgst_enable = true; 1912 } 1913 1914 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1915 if (rc < 0) { 1916 return NVME_TCP_PDU_FATAL; 1917 } 1918 pdu->readv_offset += rc; 1919 1920 if (spdk_unlikely(pdu->dif_ctx != NULL)) { 1921 rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc); 1922 if (rc != 0) { 1923 return NVME_TCP_PDU_FATAL; 1924 } 1925 } 1926 1927 if (pdu->readv_offset < data_len) { 1928 return NVME_TCP_PDU_IN_PROGRESS; 1929 } 1930 1931 /* All of this PDU has now been read from the socket. */ 1932 nvmf_tcp_pdu_payload_handle(tqpair, ttransport); 1933 break; 1934 case NVME_TCP_PDU_RECV_STATE_ERROR: 1935 if (!spdk_sock_is_connected(tqpair->sock)) { 1936 return NVME_TCP_PDU_FATAL; 1937 } 1938 break; 1939 default: 1940 assert(0); 1941 SPDK_ERRLOG("code should not come to here"); 1942 break; 1943 } 1944 } while (tqpair->recv_state != prev_state); 1945 1946 return rc; 1947 } 1948 1949 static inline void * 1950 nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list) 1951 { 1952 struct spdk_nvmf_tcp_control_msg *msg; 1953 1954 assert(list); 1955 1956 msg = STAILQ_FIRST(&list->free_msgs); 1957 if (!msg) { 1958 SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n"); 1959 return NULL; 1960 } 1961 STAILQ_REMOVE_HEAD(&list->free_msgs, link); 1962 return msg; 1963 } 1964 1965 static inline void 1966 nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg) 1967 { 1968 struct spdk_nvmf_tcp_control_msg *msg = _msg; 1969 1970 assert(list); 1971 STAILQ_INSERT_HEAD(&list->free_msgs, msg, link); 1972 } 1973 1974 static int 1975 nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, 1976 struct spdk_nvmf_transport *transport, 1977 struct spdk_nvmf_transport_poll_group *group) 1978 { 1979 struct spdk_nvmf_request *req = &tcp_req->req; 1980 struct spdk_nvme_cmd *cmd; 1981 struct spdk_nvme_cpl *rsp; 1982 struct spdk_nvme_sgl_descriptor *sgl; 1983 struct spdk_nvmf_tcp_poll_group *tgroup; 1984 uint32_t length; 1985 1986 cmd = &req->cmd->nvme_cmd; 1987 rsp = &req->rsp->nvme_cpl; 1988 sgl = &cmd->dptr.sgl1; 1989 1990 length = sgl->unkeyed.length; 1991 1992 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 1993 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 1994 if (length > transport->opts.max_io_size) { 1995 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 1996 length, transport->opts.max_io_size); 1997 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 1998 return -1; 1999 } 2000 2001 /* fill request length and populate iovs */ 2002 req->length = length; 2003 2004 SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length); 2005 2006 if (spdk_unlikely(req->dif.dif_insert_or_strip)) { 2007 req->dif.orig_length = length; 2008 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2009 req->dif.elba_length = length; 2010 } 2011 2012 if (spdk_nvmf_request_get_buffers(req, group, transport, length)) { 2013 /* No available buffers. Queue this request up. */ 2014 SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n", 2015 tcp_req); 2016 return 0; 2017 } 2018 2019 /* backward compatible */ 2020 req->data = req->iov[0].iov_base; 2021 2022 SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n", 2023 tcp_req, req->iovcnt, req->data); 2024 2025 return 0; 2026 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2027 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2028 uint64_t offset = sgl->address; 2029 uint32_t max_len = transport->opts.in_capsule_data_size; 2030 assert(tcp_req->has_incapsule_data); 2031 2032 SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2033 offset, length); 2034 2035 if (offset > max_len) { 2036 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", 2037 offset, max_len); 2038 rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; 2039 return -1; 2040 } 2041 max_len -= (uint32_t)offset; 2042 2043 if (spdk_unlikely(length > max_len)) { 2044 /* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */ 2045 if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE && 2046 (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) { 2047 2048 /* Get a buffer from dedicated list */ 2049 SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n"); 2050 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2051 assert(tgroup->control_msg_list); 2052 req->data = nvmf_tcp_control_msg_get(tgroup->control_msg_list); 2053 if (!req->data) { 2054 /* No available buffers. Queue this request up. */ 2055 SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req); 2056 return 0; 2057 } 2058 } else { 2059 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2060 length, max_len); 2061 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 2062 return -1; 2063 } 2064 } else { 2065 req->data = tcp_req->buf; 2066 } 2067 2068 req->length = length; 2069 req->data_from_pool = false; 2070 2071 if (spdk_unlikely(req->dif.dif_insert_or_strip)) { 2072 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2073 req->dif.elba_length = length; 2074 } 2075 2076 req->iov[0].iov_base = req->data; 2077 req->iov[0].iov_len = length; 2078 req->iovcnt = 1; 2079 2080 return 0; 2081 } 2082 2083 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2084 sgl->generic.type, sgl->generic.subtype); 2085 rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; 2086 return -1; 2087 } 2088 2089 static inline enum spdk_nvme_media_error_status_code 2090 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) { 2091 enum spdk_nvme_media_error_status_code result; 2092 2093 switch (err_type) 2094 { 2095 case SPDK_DIF_REFTAG_ERROR: 2096 result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; 2097 break; 2098 case SPDK_DIF_APPTAG_ERROR: 2099 result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; 2100 break; 2101 case SPDK_DIF_GUARD_ERROR: 2102 result = SPDK_NVME_SC_GUARD_CHECK_ERROR; 2103 break; 2104 default: 2105 SPDK_UNREACHABLE(); 2106 break; 2107 } 2108 2109 return result; 2110 } 2111 2112 static void 2113 nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2114 struct spdk_nvmf_tcp_req *tcp_req) 2115 { 2116 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF( 2117 tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 2118 struct nvme_tcp_pdu *rsp_pdu; 2119 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2120 uint32_t plen, pdo, alignment; 2121 int rc; 2122 2123 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2124 2125 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 2126 assert(rsp_pdu != NULL); 2127 2128 c2h_data = &rsp_pdu->hdr.c2h_data; 2129 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2130 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2131 2132 if (tqpair->host_hdgst_enable) { 2133 plen += SPDK_NVME_TCP_DIGEST_LEN; 2134 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2135 } 2136 2137 /* set the psh */ 2138 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2139 c2h_data->datal = tcp_req->req.length; 2140 c2h_data->datao = 0; 2141 2142 /* set the padding */ 2143 rsp_pdu->padding_len = 0; 2144 pdo = plen; 2145 if (tqpair->cpda) { 2146 alignment = (tqpair->cpda + 1) << 2; 2147 if (alignment > plen) { 2148 rsp_pdu->padding_len = alignment - plen; 2149 pdo = plen = alignment; 2150 } 2151 } 2152 2153 c2h_data->common.pdo = pdo; 2154 plen += c2h_data->datal; 2155 if (tqpair->host_ddgst_enable) { 2156 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2157 plen += SPDK_NVME_TCP_DIGEST_LEN; 2158 } 2159 2160 c2h_data->common.plen = plen; 2161 2162 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { 2163 rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 2164 } 2165 2166 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2167 c2h_data->datao, c2h_data->datal); 2168 2169 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { 2170 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2171 struct spdk_dif_error err_blk = {}; 2172 2173 rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt, 2174 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk); 2175 if (rc != 0) { 2176 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2177 err_blk.err_type, err_blk.err_offset); 2178 rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; 2179 rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type); 2180 nvmf_tcp_req_pdu_fini(tcp_req); 2181 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2182 return; 2183 } 2184 } 2185 2186 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2187 if (ttransport->tcp_opts.c2h_success) { 2188 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2189 } 2190 2191 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2192 } 2193 2194 static int 2195 request_transfer_out(struct spdk_nvmf_request *req) 2196 { 2197 struct spdk_nvmf_tcp_req *tcp_req; 2198 struct spdk_nvmf_qpair *qpair; 2199 struct spdk_nvmf_tcp_qpair *tqpair; 2200 struct spdk_nvme_cpl *rsp; 2201 2202 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2203 2204 qpair = req->qpair; 2205 rsp = &req->rsp->nvme_cpl; 2206 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2207 2208 /* Advance our sq_head pointer */ 2209 if (qpair->sq_head == qpair->sq_head_max) { 2210 qpair->sq_head = 0; 2211 } else { 2212 qpair->sq_head++; 2213 } 2214 rsp->sqhd = qpair->sq_head; 2215 2216 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2217 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2218 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2219 nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2220 } else { 2221 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2222 } 2223 2224 return 0; 2225 } 2226 2227 static void 2228 nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair, 2229 struct spdk_nvmf_tcp_req *tcp_req) 2230 { 2231 struct nvme_tcp_pdu *pdu; 2232 uint32_t plen = 0; 2233 2234 pdu = &tqpair->pdu_in_progress; 2235 plen = pdu->hdr.common.hlen; 2236 2237 if (tqpair->host_hdgst_enable) { 2238 plen += SPDK_NVME_TCP_DIGEST_LEN; 2239 } 2240 2241 if (pdu->hdr.common.plen != plen) { 2242 tcp_req->has_incapsule_data = true; 2243 } 2244 } 2245 2246 static bool 2247 nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 2248 struct spdk_nvmf_tcp_req *tcp_req) 2249 { 2250 struct spdk_nvmf_tcp_qpair *tqpair; 2251 int rc; 2252 enum spdk_nvmf_tcp_req_state prev_state; 2253 bool progress = false; 2254 struct spdk_nvmf_transport *transport = &ttransport->transport; 2255 struct spdk_nvmf_transport_poll_group *group; 2256 struct spdk_nvmf_tcp_poll_group *tgroup; 2257 2258 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2259 group = &tqpair->group->group; 2260 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 2261 2262 /* If the qpair is not active, we need to abort the outstanding requests. */ 2263 if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 2264 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 2265 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2266 } 2267 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 2268 } 2269 2270 /* The loop here is to allow for several back-to-back state changes. */ 2271 do { 2272 prev_state = tcp_req->state; 2273 2274 SPDK_DEBUGLOG(nvmf_tcp, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 2275 tqpair); 2276 2277 switch (tcp_req->state) { 2278 case TCP_REQUEST_STATE_FREE: 2279 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 2280 * to escape this state. */ 2281 break; 2282 case TCP_REQUEST_STATE_NEW: 2283 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0); 2284 2285 /* copy the cmd from the receive pdu */ 2286 tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe; 2287 2288 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) { 2289 tcp_req->req.dif.dif_insert_or_strip = true; 2290 tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx; 2291 } 2292 2293 /* The next state transition depends on the data transfer needs of this request. */ 2294 tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req); 2295 2296 if (spdk_unlikely(tcp_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) { 2297 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2298 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SC_INVALID_OPCODE; 2299 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2300 SPDK_DEBUGLOG(nvmf_tcp, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", tcp_req); 2301 break; 2302 } 2303 2304 /* If no data to transfer, ready to execute. */ 2305 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 2306 /* Reset the tqpair receving pdu state */ 2307 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2308 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2309 break; 2310 } 2311 2312 nvmf_tcp_set_incapsule_data(tqpair, tcp_req); 2313 2314 if (!tcp_req->has_incapsule_data) { 2315 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2316 } 2317 2318 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 2319 STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link); 2320 break; 2321 case TCP_REQUEST_STATE_NEED_BUFFER: 2322 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0); 2323 2324 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 2325 2326 if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) { 2327 SPDK_DEBUGLOG(nvmf_tcp, 2328 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", 2329 tcp_req, tqpair); 2330 /* This request needs to wait in line to obtain a buffer */ 2331 break; 2332 } 2333 2334 /* Try to get a data buffer */ 2335 rc = nvmf_tcp_req_parse_sgl(tcp_req, transport, group); 2336 if (rc < 0) { 2337 STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link); 2338 /* Reset the tqpair receving pdu state */ 2339 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2340 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2341 break; 2342 } 2343 2344 if (!tcp_req->req.data) { 2345 SPDK_DEBUGLOG(nvmf_tcp, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", 2346 tcp_req, tqpair); 2347 /* No buffers available. */ 2348 break; 2349 } 2350 2351 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2352 2353 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 2354 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2355 if (tcp_req->req.data_from_pool) { 2356 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2357 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2358 } else { 2359 struct nvme_tcp_pdu *pdu; 2360 2361 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2362 2363 pdu = &tqpair->pdu_in_progress; 2364 SPDK_DEBUGLOG(nvmf_tcp, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 2365 tqpair); 2366 /* No need to send r2t, contained in the capsuled data */ 2367 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2368 0, tcp_req->req.length); 2369 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2370 } 2371 break; 2372 } 2373 2374 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2375 break; 2376 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 2377 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0); 2378 /* The R2T completion or the h2c data incoming will kick it out of this state. */ 2379 break; 2380 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2381 2382 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, 2383 (uintptr_t)tcp_req, 0); 2384 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 2385 * to escape this state. */ 2386 break; 2387 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 2388 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0); 2389 2390 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { 2391 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 2392 tcp_req->req.length = tcp_req->req.dif.elba_length; 2393 } 2394 2395 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 2396 spdk_nvmf_request_exec(&tcp_req->req); 2397 break; 2398 case TCP_REQUEST_STATE_EXECUTING: 2399 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0); 2400 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 2401 * to escape this state. */ 2402 break; 2403 case TCP_REQUEST_STATE_EXECUTED: 2404 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0); 2405 2406 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) { 2407 tcp_req->req.length = tcp_req->req.dif.orig_length; 2408 } 2409 2410 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2411 break; 2412 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 2413 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0); 2414 rc = request_transfer_out(&tcp_req->req); 2415 assert(rc == 0); /* No good way to handle this currently */ 2416 break; 2417 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 2418 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, 2419 (uintptr_t)tcp_req, 2420 0); 2421 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 2422 * to escape this state. */ 2423 break; 2424 case TCP_REQUEST_STATE_COMPLETED: 2425 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); 2426 if (tcp_req->req.data_from_pool) { 2427 spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); 2428 } else if (spdk_unlikely(tcp_req->has_incapsule_data && (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC || 2429 tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) { 2430 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2431 assert(tgroup->control_msg_list); 2432 SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n"); 2433 nvmf_tcp_control_msg_put(tgroup->control_msg_list, tcp_req->req.data); 2434 } 2435 tcp_req->req.length = 0; 2436 tcp_req->req.iovcnt = 0; 2437 tcp_req->req.data = NULL; 2438 2439 nvmf_tcp_req_pdu_fini(tcp_req); 2440 2441 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 2442 break; 2443 case TCP_REQUEST_NUM_STATES: 2444 default: 2445 assert(0); 2446 break; 2447 } 2448 2449 if (tcp_req->state != prev_state) { 2450 progress = true; 2451 } 2452 } while (tcp_req->state != prev_state); 2453 2454 return progress; 2455 } 2456 2457 static void 2458 nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 2459 { 2460 struct spdk_nvmf_tcp_qpair *tqpair = arg; 2461 int rc; 2462 2463 assert(tqpair != NULL); 2464 rc = nvmf_tcp_sock_process(tqpair); 2465 2466 /* If there was a new socket error, disconnect */ 2467 if (rc < 0) { 2468 nvmf_tcp_qpair_disconnect(tqpair); 2469 } 2470 } 2471 2472 static int 2473 nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2474 struct spdk_nvmf_qpair *qpair) 2475 { 2476 struct spdk_nvmf_tcp_poll_group *tgroup; 2477 struct spdk_nvmf_tcp_qpair *tqpair; 2478 int rc; 2479 2480 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2481 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2482 2483 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 2484 nvmf_tcp_sock_cb, tqpair); 2485 if (rc != 0) { 2486 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 2487 spdk_strerror(errno), errno); 2488 return -1; 2489 } 2490 2491 rc = nvmf_tcp_qpair_sock_init(tqpair); 2492 if (rc != 0) { 2493 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 2494 return -1; 2495 } 2496 2497 rc = nvmf_tcp_qpair_init(&tqpair->qpair); 2498 if (rc < 0) { 2499 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 2500 return -1; 2501 } 2502 2503 rc = nvmf_tcp_qpair_init_mem_resource(tqpair); 2504 if (rc < 0) { 2505 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 2506 return -1; 2507 } 2508 2509 tqpair->group = tgroup; 2510 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2511 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 2512 2513 return 0; 2514 } 2515 2516 static int 2517 nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2518 struct spdk_nvmf_qpair *qpair) 2519 { 2520 struct spdk_nvmf_tcp_poll_group *tgroup; 2521 struct spdk_nvmf_tcp_qpair *tqpair; 2522 int rc; 2523 2524 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2525 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2526 2527 assert(tqpair->group == tgroup); 2528 2529 SPDK_DEBUGLOG(nvmf_tcp, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 2530 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 2531 2532 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 2533 if (rc != 0) { 2534 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 2535 spdk_strerror(errno), errno); 2536 } 2537 2538 return rc; 2539 } 2540 2541 static int 2542 nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 2543 { 2544 struct spdk_nvmf_tcp_transport *ttransport; 2545 struct spdk_nvmf_tcp_req *tcp_req; 2546 2547 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 2548 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2549 2550 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 2551 nvmf_tcp_req_process(ttransport, tcp_req); 2552 2553 return 0; 2554 } 2555 2556 static void 2557 nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair, 2558 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2559 { 2560 struct spdk_nvmf_tcp_qpair *tqpair; 2561 2562 SPDK_DEBUGLOG(nvmf_tcp, "Qpair: %p\n", qpair); 2563 2564 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2565 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED; 2566 nvmf_tcp_qpair_destroy(tqpair); 2567 2568 if (cb_fn) { 2569 cb_fn(cb_arg); 2570 } 2571 } 2572 2573 static int 2574 nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2575 { 2576 struct spdk_nvmf_tcp_poll_group *tgroup; 2577 int rc; 2578 struct spdk_nvmf_request *req, *req_tmp; 2579 struct spdk_nvmf_tcp_req *tcp_req; 2580 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport, 2581 struct spdk_nvmf_tcp_transport, transport); 2582 2583 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2584 2585 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs))) { 2586 return 0; 2587 } 2588 2589 STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) { 2590 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2591 if (nvmf_tcp_req_process(ttransport, tcp_req) == false) { 2592 break; 2593 } 2594 } 2595 2596 rc = spdk_sock_group_poll(tgroup->sock_group); 2597 if (rc < 0) { 2598 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 2599 } 2600 2601 return rc; 2602 } 2603 2604 static int 2605 nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 2606 struct spdk_nvme_transport_id *trid, bool peer) 2607 { 2608 struct spdk_nvmf_tcp_qpair *tqpair; 2609 uint16_t port; 2610 2611 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2612 spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP); 2613 2614 if (peer) { 2615 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 2616 port = tqpair->initiator_port; 2617 } else { 2618 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 2619 port = tqpair->target_port; 2620 } 2621 2622 if (spdk_sock_is_ipv4(tqpair->sock)) { 2623 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 2624 } else if (spdk_sock_is_ipv6(tqpair->sock)) { 2625 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 2626 } else { 2627 return -1; 2628 } 2629 2630 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 2631 return 0; 2632 } 2633 2634 static int 2635 nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2636 struct spdk_nvme_transport_id *trid) 2637 { 2638 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2639 } 2640 2641 static int 2642 nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2643 struct spdk_nvme_transport_id *trid) 2644 { 2645 return nvmf_tcp_qpair_get_trid(qpair, trid, 1); 2646 } 2647 2648 static int 2649 nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2650 struct spdk_nvme_transport_id *trid) 2651 { 2652 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 2653 } 2654 2655 static void 2656 nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req, 2657 struct spdk_nvmf_tcp_req *tcp_req_to_abort) 2658 { 2659 tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2660 tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; 2661 2662 nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2663 2664 req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ 2665 } 2666 2667 static int 2668 _nvmf_tcp_qpair_abort_request(void *ctx) 2669 { 2670 struct spdk_nvmf_request *req = ctx; 2671 struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort, 2672 struct spdk_nvmf_tcp_req, req); 2673 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, 2674 struct spdk_nvmf_tcp_qpair, qpair); 2675 int rc; 2676 2677 spdk_poller_unregister(&req->poller); 2678 2679 switch (tcp_req_to_abort->state) { 2680 case TCP_REQUEST_STATE_EXECUTING: 2681 rc = nvmf_ctrlr_abort_request(req); 2682 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { 2683 return SPDK_POLLER_BUSY; 2684 } 2685 break; 2686 2687 case TCP_REQUEST_STATE_NEED_BUFFER: 2688 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, 2689 &tcp_req_to_abort->req, spdk_nvmf_request, buf_link); 2690 2691 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 2692 break; 2693 2694 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 2695 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 2696 break; 2697 2698 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2699 if (spdk_get_ticks() < req->timeout_tsc) { 2700 req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0); 2701 return SPDK_POLLER_BUSY; 2702 } 2703 break; 2704 2705 default: 2706 break; 2707 } 2708 2709 spdk_nvmf_request_complete(req); 2710 return SPDK_POLLER_BUSY; 2711 } 2712 2713 static void 2714 nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2715 struct spdk_nvmf_request *req) 2716 { 2717 struct spdk_nvmf_tcp_qpair *tqpair; 2718 struct spdk_nvmf_tcp_transport *ttransport; 2719 struct spdk_nvmf_transport *transport; 2720 uint16_t cid; 2721 uint32_t i; 2722 struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL; 2723 2724 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 2725 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 2726 transport = &ttransport->transport; 2727 2728 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2729 2730 for (i = 0; i < tqpair->resource_count; i++) { 2731 tcp_req_to_abort = &tqpair->reqs[i]; 2732 2733 if (tcp_req_to_abort->state != TCP_REQUEST_STATE_FREE && 2734 tcp_req_to_abort->req.cmd->nvme_cmd.cid == cid) { 2735 break; 2736 } 2737 } 2738 2739 if (tcp_req_to_abort == NULL) { 2740 spdk_nvmf_request_complete(req); 2741 return; 2742 } 2743 2744 req->req_to_abort = &tcp_req_to_abort->req; 2745 req->timeout_tsc = spdk_get_ticks() + 2746 transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); 2747 req->poller = NULL; 2748 2749 _nvmf_tcp_qpair_abort_request(req); 2750 } 2751 2752 #define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128 2753 #define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128 2754 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 2755 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 2756 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 2757 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 2758 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 2759 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32 2760 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 2761 #define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1 2762 2763 static void 2764 nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 2765 { 2766 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH; 2767 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2768 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 2769 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 2770 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 2771 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH; 2772 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 2773 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 2774 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 2775 opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC; 2776 opts->transport_specific = NULL; 2777 } 2778 2779 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 2780 .name = "TCP", 2781 .type = SPDK_NVME_TRANSPORT_TCP, 2782 .opts_init = nvmf_tcp_opts_init, 2783 .create = nvmf_tcp_create, 2784 .dump_opts = nvmf_tcp_dump_opts, 2785 .destroy = nvmf_tcp_destroy, 2786 2787 .listen = nvmf_tcp_listen, 2788 .stop_listen = nvmf_tcp_stop_listen, 2789 .accept = nvmf_tcp_accept, 2790 2791 .listener_discover = nvmf_tcp_discover, 2792 2793 .poll_group_create = nvmf_tcp_poll_group_create, 2794 .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group, 2795 .poll_group_destroy = nvmf_tcp_poll_group_destroy, 2796 .poll_group_add = nvmf_tcp_poll_group_add, 2797 .poll_group_remove = nvmf_tcp_poll_group_remove, 2798 .poll_group_poll = nvmf_tcp_poll_group_poll, 2799 2800 .req_free = nvmf_tcp_req_free, 2801 .req_complete = nvmf_tcp_req_complete, 2802 2803 .qpair_fini = nvmf_tcp_close_qpair, 2804 .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid, 2805 .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid, 2806 .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid, 2807 .qpair_abort_request = nvmf_tcp_qpair_abort_request, 2808 }; 2809 2810 SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp); 2811 SPDK_LOG_REGISTER_COMPONENT(nvmf_tcp) 2812