1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/accel.h" 8 #include "spdk/stdinc.h" 9 #include "spdk/crc32.h" 10 #include "spdk/endian.h" 11 #include "spdk/assert.h" 12 #include "spdk/thread.h" 13 #include "spdk/nvmf_transport.h" 14 #include "spdk/string.h" 15 #include "spdk/trace.h" 16 #include "spdk/util.h" 17 #include "spdk/log.h" 18 #include "spdk/keyring.h" 19 20 #include "spdk_internal/assert.h" 21 #include "spdk_internal/nvme_tcp.h" 22 #include "spdk_internal/sock.h" 23 24 #include "nvmf_internal.h" 25 #include "transport.h" 26 27 #include "spdk_internal/trace_defs.h" 28 29 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 30 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 16 31 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 32 #define SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM 32 33 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 34 35 #define SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH 2 36 #define SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH 65535 37 #define SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH 2 38 #define SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH 4096 39 40 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH 128 41 #define SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH 128 42 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 43 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 44 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 45 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 46 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 47 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE UINT32_MAX 48 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 49 #define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1 50 51 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp; 52 static bool g_tls_log = false; 53 54 /* spdk nvmf related structure */ 55 enum spdk_nvmf_tcp_req_state { 56 57 /* The request is not currently in use */ 58 TCP_REQUEST_STATE_FREE = 0, 59 60 /* Initial state when request first received */ 61 TCP_REQUEST_STATE_NEW = 1, 62 63 /* The request is queued until a data buffer is available. */ 64 TCP_REQUEST_STATE_NEED_BUFFER = 2, 65 66 /* The request has the data buffer available */ 67 TCP_REQUEST_STATE_HAVE_BUFFER = 3, 68 69 /* The request is waiting for zcopy_start to finish */ 70 TCP_REQUEST_STATE_AWAITING_ZCOPY_START = 4, 71 72 /* The request has received a zero-copy buffer */ 73 TCP_REQUEST_STATE_ZCOPY_START_COMPLETED = 5, 74 75 /* The request is currently transferring data from the host to the controller. */ 76 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER = 6, 77 78 /* The request is waiting for the R2T send acknowledgement. */ 79 TCP_REQUEST_STATE_AWAITING_R2T_ACK = 7, 80 81 /* The request is ready to execute at the block device */ 82 TCP_REQUEST_STATE_READY_TO_EXECUTE = 8, 83 84 /* The request is currently executing at the block device */ 85 TCP_REQUEST_STATE_EXECUTING = 9, 86 87 /* The request is waiting for zcopy buffers to be committed */ 88 TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT = 10, 89 90 /* The request finished executing at the block device */ 91 TCP_REQUEST_STATE_EXECUTED = 11, 92 93 /* The request is ready to send a completion */ 94 TCP_REQUEST_STATE_READY_TO_COMPLETE = 12, 95 96 /* The request is currently transferring final pdus from the controller to the host. */ 97 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST = 13, 98 99 /* The request is waiting for zcopy buffers to be released (without committing) */ 100 TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE = 14, 101 102 /* The request completed and can be marked free. */ 103 TCP_REQUEST_STATE_COMPLETED = 15, 104 105 /* Terminator */ 106 TCP_REQUEST_NUM_STATES, 107 }; 108 109 enum nvmf_tcp_qpair_state { 110 NVMF_TCP_QPAIR_STATE_INVALID = 0, 111 NVMF_TCP_QPAIR_STATE_INITIALIZING = 1, 112 NVMF_TCP_QPAIR_STATE_RUNNING = 2, 113 NVMF_TCP_QPAIR_STATE_EXITING = 3, 114 NVMF_TCP_QPAIR_STATE_EXITED = 4, 115 }; 116 117 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 118 "Invalid PDU Header Field", 119 "PDU Sequence Error", 120 "Header Digiest Error", 121 "Data Transfer Out of Range", 122 "R2T Limit Exceeded", 123 "Unsupported parameter", 124 }; 125 126 static void 127 nvmf_tcp_trace(void) 128 { 129 spdk_trace_register_owner_type(OWNER_TYPE_NVMF_TCP, 't'); 130 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 131 spdk_trace_register_description("TCP_REQ_NEW", 132 TRACE_TCP_REQUEST_STATE_NEW, 133 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 1, 134 SPDK_TRACE_ARG_TYPE_INT, "qd"); 135 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 136 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 137 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 138 SPDK_TRACE_ARG_TYPE_INT, ""); 139 spdk_trace_register_description("TCP_REQ_HAVE_BUFFER", 140 TRACE_TCP_REQUEST_STATE_HAVE_BUFFER, 141 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 142 SPDK_TRACE_ARG_TYPE_INT, ""); 143 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_START", 144 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, 145 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 146 SPDK_TRACE_ARG_TYPE_INT, ""); 147 spdk_trace_register_description("TCP_REQ_ZCPY_START_CPL", 148 TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, 149 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 150 SPDK_TRACE_ARG_TYPE_INT, ""); 151 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 152 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 153 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 154 SPDK_TRACE_ARG_TYPE_INT, ""); 155 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 156 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 157 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 158 SPDK_TRACE_ARG_TYPE_INT, ""); 159 spdk_trace_register_description("TCP_REQ_EXECUTING", 160 TRACE_TCP_REQUEST_STATE_EXECUTING, 161 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 162 SPDK_TRACE_ARG_TYPE_INT, ""); 163 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_CMT", 164 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, 165 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 166 SPDK_TRACE_ARG_TYPE_INT, ""); 167 spdk_trace_register_description("TCP_REQ_EXECUTED", 168 TRACE_TCP_REQUEST_STATE_EXECUTED, 169 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 170 SPDK_TRACE_ARG_TYPE_INT, ""); 171 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 172 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 173 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 174 SPDK_TRACE_ARG_TYPE_INT, ""); 175 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 176 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 177 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 178 SPDK_TRACE_ARG_TYPE_INT, ""); 179 spdk_trace_register_description("TCP_REQ_AWAIT_ZCPY_RLS", 180 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, 181 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 182 SPDK_TRACE_ARG_TYPE_INT, ""); 183 spdk_trace_register_description("TCP_REQ_COMPLETED", 184 TRACE_TCP_REQUEST_STATE_COMPLETED, 185 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 186 SPDK_TRACE_ARG_TYPE_INT, "qd"); 187 spdk_trace_register_description("TCP_READ_DONE", 188 TRACE_TCP_READ_FROM_SOCKET_DONE, 189 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 190 SPDK_TRACE_ARG_TYPE_INT, ""); 191 spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", 192 TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 193 OWNER_TYPE_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 194 SPDK_TRACE_ARG_TYPE_INT, ""); 195 196 spdk_trace_register_description("TCP_QP_CREATE", TRACE_TCP_QP_CREATE, 197 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 198 SPDK_TRACE_ARG_TYPE_INT, ""); 199 spdk_trace_register_description("TCP_QP_SOCK_INIT", TRACE_TCP_QP_SOCK_INIT, 200 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 201 SPDK_TRACE_ARG_TYPE_INT, ""); 202 spdk_trace_register_description("TCP_QP_STATE_CHANGE", TRACE_TCP_QP_STATE_CHANGE, 203 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 204 SPDK_TRACE_ARG_TYPE_INT, "state"); 205 spdk_trace_register_description("TCP_QP_DISCONNECT", TRACE_TCP_QP_DISCONNECT, 206 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 207 SPDK_TRACE_ARG_TYPE_INT, ""); 208 spdk_trace_register_description("TCP_QP_DESTROY", TRACE_TCP_QP_DESTROY, 209 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 210 SPDK_TRACE_ARG_TYPE_INT, ""); 211 spdk_trace_register_description("TCP_QP_ABORT_REQ", TRACE_TCP_QP_ABORT_REQ, 212 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 213 SPDK_TRACE_ARG_TYPE_INT, ""); 214 spdk_trace_register_description("TCP_QP_RCV_STATE_CHANGE", TRACE_TCP_QP_RCV_STATE_CHANGE, 215 OWNER_TYPE_NVMF_TCP, OBJECT_NONE, 0, 216 SPDK_TRACE_ARG_TYPE_INT, "state"); 217 218 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_NVMF_TCP_IO, 1); 219 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_NVMF_TCP_IO, 0); 220 spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_QUEUE, OBJECT_NVMF_TCP_IO, 0); 221 spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_PEND, OBJECT_NVMF_TCP_IO, 0); 222 spdk_trace_tpoint_register_relation(TRACE_SOCK_REQ_COMPLETE, OBJECT_NVMF_TCP_IO, 0); 223 } 224 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 225 226 struct spdk_nvmf_tcp_req { 227 struct spdk_nvmf_request req; 228 struct spdk_nvme_cpl rsp; 229 struct spdk_nvme_cmd cmd; 230 231 /* A PDU that can be used for sending responses. This is 232 * not the incoming PDU! */ 233 struct nvme_tcp_pdu *pdu; 234 235 /* In-capsule data buffer */ 236 uint8_t *buf; 237 238 struct spdk_nvmf_tcp_req *fused_pair; 239 240 /* 241 * The PDU for a request may be used multiple times in serial over 242 * the request's lifetime. For example, first to send an R2T, then 243 * to send a completion. To catch mistakes where the PDU is used 244 * twice at the same time, add a debug flag here for init/fini. 245 */ 246 bool pdu_in_use; 247 bool has_in_capsule_data; 248 bool fused_failed; 249 250 /* transfer_tag */ 251 uint16_t ttag; 252 253 enum spdk_nvmf_tcp_req_state state; 254 255 /* 256 * h2c_offset is used when we receive the h2c_data PDU. 257 */ 258 uint32_t h2c_offset; 259 260 STAILQ_ENTRY(spdk_nvmf_tcp_req) link; 261 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 262 STAILQ_ENTRY(spdk_nvmf_tcp_req) control_msg_link; 263 }; 264 265 struct spdk_nvmf_tcp_qpair { 266 struct spdk_nvmf_qpair qpair; 267 struct spdk_nvmf_tcp_poll_group *group; 268 struct spdk_sock *sock; 269 270 enum nvme_tcp_pdu_recv_state recv_state; 271 enum nvmf_tcp_qpair_state state; 272 273 /* PDU being actively received */ 274 struct nvme_tcp_pdu *pdu_in_progress; 275 276 struct spdk_nvmf_tcp_req *fused_first; 277 278 /* Queues to track the requests in all states */ 279 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_working_queue; 280 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_free_queue; 281 SLIST_HEAD(, nvme_tcp_pdu) tcp_pdu_free_queue; 282 /* Number of working pdus */ 283 uint32_t tcp_pdu_working_count; 284 285 /* Number of requests in each state */ 286 uint32_t state_cntr[TCP_REQUEST_NUM_STATES]; 287 288 uint8_t cpda; 289 290 bool host_hdgst_enable; 291 bool host_ddgst_enable; 292 293 /* This is a spare PDU used for sending special management 294 * operations. Primarily, this is used for the initial 295 * connection response and c2h termination request. */ 296 struct nvme_tcp_pdu *mgmt_pdu; 297 298 /* Arrays of in-capsule buffers, requests, and pdus. 299 * Each array is 'resource_count' number of elements */ 300 void *bufs; 301 struct spdk_nvmf_tcp_req *reqs; 302 struct nvme_tcp_pdu *pdus; 303 uint32_t resource_count; 304 uint32_t recv_buf_size; 305 306 struct spdk_nvmf_tcp_port *port; 307 308 /* IP address */ 309 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 310 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 311 312 /* IP port */ 313 uint16_t initiator_port; 314 uint16_t target_port; 315 316 /* Wait until the host terminates the connection (e.g. after sending C2HTermReq) */ 317 bool wait_terminate; 318 319 /* Timer used to destroy qpair after detecting transport error issue if initiator does 320 * not close the connection. 321 */ 322 struct spdk_poller *timeout_poller; 323 324 spdk_nvmf_transport_qpair_fini_cb fini_cb_fn; 325 void *fini_cb_arg; 326 327 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 328 bool pending_flush; 329 }; 330 331 struct spdk_nvmf_tcp_control_msg { 332 STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link; 333 }; 334 335 struct spdk_nvmf_tcp_control_msg_list { 336 void *msg_buf; 337 STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs; 338 STAILQ_HEAD(, spdk_nvmf_tcp_req) waiting_for_msg_reqs; 339 }; 340 341 struct spdk_nvmf_tcp_poll_group { 342 struct spdk_nvmf_transport_poll_group group; 343 struct spdk_sock_group *sock_group; 344 345 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 346 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req; 347 348 struct spdk_io_channel *accel_channel; 349 struct spdk_nvmf_tcp_control_msg_list *control_msg_list; 350 351 TAILQ_ENTRY(spdk_nvmf_tcp_poll_group) link; 352 }; 353 354 struct spdk_nvmf_tcp_port { 355 const struct spdk_nvme_transport_id *trid; 356 struct spdk_sock *listen_sock; 357 struct spdk_nvmf_transport *transport; 358 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 359 }; 360 361 struct tcp_transport_opts { 362 bool c2h_success; 363 uint16_t control_msg_num; 364 uint32_t sock_priority; 365 }; 366 367 struct tcp_psk_entry { 368 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 369 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 370 char pskid[NVMF_PSK_IDENTITY_LEN]; 371 uint8_t psk[SPDK_TLS_PSK_MAX_LEN]; 372 struct spdk_key *key; 373 uint32_t psk_size; 374 enum nvme_tcp_cipher_suite tls_cipher_suite; 375 TAILQ_ENTRY(tcp_psk_entry) link; 376 }; 377 378 struct spdk_nvmf_tcp_transport { 379 struct spdk_nvmf_transport transport; 380 struct tcp_transport_opts tcp_opts; 381 uint32_t ack_timeout; 382 383 struct spdk_nvmf_tcp_poll_group *next_pg; 384 385 struct spdk_poller *accept_poller; 386 struct spdk_sock_group *listen_sock_group; 387 388 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 389 TAILQ_HEAD(, spdk_nvmf_tcp_poll_group) poll_groups; 390 391 TAILQ_HEAD(, tcp_psk_entry) psks; 392 }; 393 394 static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = { 395 { 396 "c2h_success", offsetof(struct tcp_transport_opts, c2h_success), 397 spdk_json_decode_bool, true 398 }, 399 { 400 "control_msg_num", offsetof(struct tcp_transport_opts, control_msg_num), 401 spdk_json_decode_uint16, true 402 }, 403 { 404 "sock_priority", offsetof(struct tcp_transport_opts, sock_priority), 405 spdk_json_decode_uint32, true 406 }, 407 }; 408 409 static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 410 struct spdk_nvmf_tcp_req *tcp_req); 411 static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); 412 413 static void _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 414 struct spdk_nvmf_tcp_req *tcp_req); 415 416 static inline void 417 nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 418 enum spdk_nvmf_tcp_req_state state) 419 { 420 struct spdk_nvmf_qpair *qpair; 421 struct spdk_nvmf_tcp_qpair *tqpair; 422 423 qpair = tcp_req->req.qpair; 424 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 425 426 assert(tqpair->state_cntr[tcp_req->state] > 0); 427 tqpair->state_cntr[tcp_req->state]--; 428 tqpair->state_cntr[state]++; 429 430 tcp_req->state = state; 431 } 432 433 static inline struct nvme_tcp_pdu * 434 nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req) 435 { 436 assert(tcp_req->pdu_in_use == false); 437 438 memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu)); 439 tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 440 441 return tcp_req->pdu; 442 } 443 444 static struct spdk_nvmf_tcp_req * 445 nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 446 { 447 struct spdk_nvmf_tcp_req *tcp_req; 448 449 tcp_req = TAILQ_FIRST(&tqpair->tcp_req_free_queue); 450 if (spdk_unlikely(!tcp_req)) { 451 return NULL; 452 } 453 454 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 455 tcp_req->h2c_offset = 0; 456 tcp_req->has_in_capsule_data = false; 457 tcp_req->req.dif_enabled = false; 458 tcp_req->req.zcopy_phase = NVMF_ZCOPY_PHASE_NONE; 459 460 TAILQ_REMOVE(&tqpair->tcp_req_free_queue, tcp_req, state_link); 461 TAILQ_INSERT_TAIL(&tqpair->tcp_req_working_queue, tcp_req, state_link); 462 tqpair->qpair.queue_depth++; 463 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 464 return tcp_req; 465 } 466 467 static inline void 468 nvmf_tcp_req_put(struct spdk_nvmf_tcp_qpair *tqpair, struct spdk_nvmf_tcp_req *tcp_req) 469 { 470 assert(!tcp_req->pdu_in_use); 471 472 TAILQ_REMOVE(&tqpair->tcp_req_working_queue, tcp_req, state_link); 473 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 474 tqpair->qpair.queue_depth--; 475 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 476 } 477 478 static void 479 nvmf_tcp_req_get_buffers_done(struct spdk_nvmf_request *req) 480 { 481 struct spdk_nvmf_tcp_req *tcp_req; 482 struct spdk_nvmf_transport *transport; 483 struct spdk_nvmf_tcp_transport *ttransport; 484 485 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 486 transport = req->qpair->transport; 487 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 488 489 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER); 490 nvmf_tcp_req_process(ttransport, tcp_req); 491 } 492 493 static void 494 nvmf_tcp_request_free(void *cb_arg) 495 { 496 struct spdk_nvmf_tcp_transport *ttransport; 497 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 498 499 assert(tcp_req != NULL); 500 501 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req=%p will be freed\n", tcp_req); 502 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 503 struct spdk_nvmf_tcp_transport, transport); 504 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 505 nvmf_tcp_req_process(ttransport, tcp_req); 506 } 507 508 static int 509 nvmf_tcp_req_free(struct spdk_nvmf_request *req) 510 { 511 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 512 513 nvmf_tcp_request_free(tcp_req); 514 515 return 0; 516 } 517 518 static void 519 nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 520 enum spdk_nvmf_tcp_req_state state) 521 { 522 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 523 524 assert(state != TCP_REQUEST_STATE_FREE); 525 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 526 if (state == tcp_req->state) { 527 nvmf_tcp_request_free(tcp_req); 528 } 529 } 530 } 531 532 static inline void 533 nvmf_tcp_request_get_buffers_abort(struct spdk_nvmf_tcp_req *tcp_req) 534 { 535 /* Request can wait either for the iobuf or control_msg */ 536 struct spdk_nvmf_poll_group *group = tcp_req->req.qpair->group; 537 struct spdk_nvmf_transport *transport = tcp_req->req.qpair->transport; 538 struct spdk_nvmf_transport_poll_group *tgroup = nvmf_get_transport_poll_group(group, transport); 539 struct spdk_nvmf_tcp_poll_group *tcp_group = SPDK_CONTAINEROF(tgroup, 540 struct spdk_nvmf_tcp_poll_group, group); 541 struct spdk_nvmf_tcp_req *tmp_req, *abort_req; 542 543 assert(tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER); 544 545 STAILQ_FOREACH_SAFE(abort_req, &tcp_group->control_msg_list->waiting_for_msg_reqs, control_msg_link, 546 tmp_req) { 547 if (abort_req == tcp_req) { 548 STAILQ_REMOVE(&tcp_group->control_msg_list->waiting_for_msg_reqs, abort_req, spdk_nvmf_tcp_req, 549 control_msg_link); 550 return; 551 } 552 } 553 554 if (!nvmf_request_get_buffers_abort(&tcp_req->req)) { 555 SPDK_ERRLOG("Failed to abort tcp_req=%p\n", tcp_req); 556 assert(0 && "Should never happen"); 557 } 558 } 559 560 static void 561 nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 562 { 563 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 564 565 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 566 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 567 568 /* Wipe the requests waiting for buffer from the waiting list */ 569 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 570 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 571 nvmf_tcp_request_get_buffers_abort(tcp_req); 572 } 573 } 574 575 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 576 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 577 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 578 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 579 } 580 581 static void 582 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 583 { 584 int i; 585 struct spdk_nvmf_tcp_req *tcp_req; 586 587 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 588 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 589 SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]); 590 TAILQ_FOREACH(tcp_req, &tqpair->tcp_req_working_queue, state_link) { 591 if ((int)tcp_req->state == i) { 592 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool); 593 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 594 } 595 } 596 } 597 } 598 599 static void 600 _nvmf_tcp_qpair_destroy(void *_tqpair) 601 { 602 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 603 spdk_nvmf_transport_qpair_fini_cb cb_fn = tqpair->fini_cb_fn; 604 void *cb_arg = tqpair->fini_cb_arg; 605 int err = 0; 606 607 spdk_trace_record(TRACE_TCP_QP_DESTROY, tqpair->qpair.trace_id, 0, 0); 608 609 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 610 611 err = spdk_sock_close(&tqpair->sock); 612 assert(err == 0); 613 nvmf_tcp_cleanup_all_states(tqpair); 614 615 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) { 616 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 617 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 618 tqpair->resource_count); 619 err++; 620 } 621 622 if (err > 0) { 623 nvmf_tcp_dump_qpair_req_contents(tqpair); 624 } 625 626 /* The timeout poller might still be registered here if we close the qpair before host 627 * terminates the connection. 628 */ 629 spdk_poller_unregister(&tqpair->timeout_poller); 630 spdk_dma_free(tqpair->pdus); 631 free(tqpair->reqs); 632 spdk_free(tqpair->bufs); 633 spdk_trace_unregister_owner(tqpair->qpair.trace_id); 634 free(tqpair); 635 636 if (cb_fn != NULL) { 637 cb_fn(cb_arg); 638 } 639 640 SPDK_DEBUGLOG(nvmf_tcp, "Leave\n"); 641 } 642 643 static void 644 nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 645 { 646 /* Delay the destruction to make sure it isn't performed from the context of a sock 647 * callback. Otherwise, spdk_sock_close() might not abort pending requests, causing their 648 * completions to be executed after the qpair is freed. (Note: this fixed issue #2471.) 649 */ 650 spdk_thread_send_msg(spdk_get_thread(), _nvmf_tcp_qpair_destroy, tqpair); 651 } 652 653 static void 654 nvmf_tcp_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w) 655 { 656 struct spdk_nvmf_tcp_transport *ttransport; 657 assert(w != NULL); 658 659 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 660 spdk_json_write_named_bool(w, "c2h_success", ttransport->tcp_opts.c2h_success); 661 spdk_json_write_named_uint32(w, "sock_priority", ttransport->tcp_opts.sock_priority); 662 } 663 664 static void 665 nvmf_tcp_free_psk_entry(struct tcp_psk_entry *entry) 666 { 667 if (entry == NULL) { 668 return; 669 } 670 671 spdk_memset_s(entry->psk, sizeof(entry->psk), 0, sizeof(entry->psk)); 672 spdk_keyring_put_key(entry->key); 673 free(entry); 674 } 675 676 static int 677 nvmf_tcp_destroy(struct spdk_nvmf_transport *transport, 678 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 679 { 680 struct spdk_nvmf_tcp_transport *ttransport; 681 struct tcp_psk_entry *entry, *tmp; 682 683 assert(transport != NULL); 684 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 685 686 TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) { 687 TAILQ_REMOVE(&ttransport->psks, entry, link); 688 nvmf_tcp_free_psk_entry(entry); 689 } 690 691 spdk_poller_unregister(&ttransport->accept_poller); 692 spdk_sock_group_unregister_interrupt(ttransport->listen_sock_group); 693 spdk_sock_group_close(&ttransport->listen_sock_group); 694 free(ttransport); 695 696 if (cb_fn) { 697 cb_fn(cb_arg); 698 } 699 return 0; 700 } 701 702 static int nvmf_tcp_accept(void *ctx); 703 704 static void nvmf_tcp_accept_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock); 705 706 static struct spdk_nvmf_transport * 707 nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 708 { 709 struct spdk_nvmf_tcp_transport *ttransport; 710 uint32_t sge_count; 711 uint32_t min_shared_buffers; 712 int rc; 713 uint64_t period; 714 715 ttransport = calloc(1, sizeof(*ttransport)); 716 if (!ttransport) { 717 return NULL; 718 } 719 720 TAILQ_INIT(&ttransport->ports); 721 TAILQ_INIT(&ttransport->poll_groups); 722 TAILQ_INIT(&ttransport->psks); 723 724 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 725 726 ttransport->tcp_opts.c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 727 ttransport->tcp_opts.sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 728 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 729 if (opts->transport_specific != NULL && 730 spdk_json_decode_object_relaxed(opts->transport_specific, tcp_transport_opts_decoder, 731 SPDK_COUNTOF(tcp_transport_opts_decoder), 732 &ttransport->tcp_opts)) { 733 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 734 free(ttransport); 735 return NULL; 736 } 737 738 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 739 740 SPDK_INFOLOG(nvmf_tcp, "*** TCP Transport Init ***\n" 741 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 742 " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 743 " in_capsule_data_size=%d, max_aq_depth=%d\n" 744 " num_shared_buffers=%d, c2h_success=%d,\n" 745 " dif_insert_or_strip=%d, sock_priority=%d\n" 746 " abort_timeout_sec=%d, control_msg_num=%hu\n" 747 " ack_timeout=%d\n", 748 opts->max_queue_depth, 749 opts->max_io_size, 750 opts->max_qpairs_per_ctrlr - 1, 751 opts->io_unit_size, 752 opts->in_capsule_data_size, 753 opts->max_aq_depth, 754 opts->num_shared_buffers, 755 ttransport->tcp_opts.c2h_success, 756 opts->dif_insert_or_strip, 757 ttransport->tcp_opts.sock_priority, 758 opts->abort_timeout_sec, 759 ttransport->tcp_opts.control_msg_num, 760 opts->ack_timeout); 761 762 if (ttransport->tcp_opts.sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 763 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 764 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 765 ttransport->tcp_opts.sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 766 free(ttransport); 767 return NULL; 768 } 769 770 if (ttransport->tcp_opts.control_msg_num == 0 && 771 opts->in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 772 SPDK_WARNLOG("TCP param control_msg_num can't be 0 if ICD is less than %u bytes. Using default value %u\n", 773 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM); 774 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 775 } 776 777 /* I/O unit size cannot be larger than max I/O size */ 778 if (opts->io_unit_size > opts->max_io_size) { 779 SPDK_WARNLOG("TCP param io_unit_size %u can't be larger than max_io_size %u. Using max_io_size as io_unit_size\n", 780 opts->io_unit_size, opts->max_io_size); 781 opts->io_unit_size = opts->max_io_size; 782 } 783 784 /* In capsule data size cannot be larger than max I/O size */ 785 if (opts->in_capsule_data_size > opts->max_io_size) { 786 SPDK_WARNLOG("TCP param ICD size %u can't be larger than max_io_size %u. Using max_io_size as ICD size\n", 787 opts->io_unit_size, opts->max_io_size); 788 opts->in_capsule_data_size = opts->max_io_size; 789 } 790 791 /* max IO queue depth cannot be smaller than 2 or larger than 65535. 792 * We will not check SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, because max_queue_depth is 16bits and always not larger than 64k. */ 793 if (opts->max_queue_depth < SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH) { 794 SPDK_WARNLOG("TCP param max_queue_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 795 opts->max_queue_depth, SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH, 796 SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH); 797 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 798 } 799 800 /* max admin queue depth cannot be smaller than 2 or larger than 4096 */ 801 if (opts->max_aq_depth < SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH || 802 opts->max_aq_depth > SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH) { 803 SPDK_WARNLOG("TCP param max_aq_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 804 opts->max_aq_depth, SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH, 805 SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH); 806 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 807 } 808 809 sge_count = opts->max_io_size / opts->io_unit_size; 810 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 811 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 812 free(ttransport); 813 return NULL; 814 } 815 816 /* If buf_cache_size == UINT32_MAX, we will dynamically pick a cache size later that we know will fit. */ 817 if (opts->buf_cache_size < UINT32_MAX) { 818 min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size; 819 if (min_shared_buffers > opts->num_shared_buffers) { 820 SPDK_ERRLOG("There are not enough buffers to satisfy " 821 "per-poll group caches for each thread. (%" PRIu32 ") " 822 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 823 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 824 free(ttransport); 825 return NULL; 826 } 827 } 828 829 period = spdk_interrupt_mode_is_enabled() ? 0 : opts->acceptor_poll_rate; 830 ttransport->accept_poller = SPDK_POLLER_REGISTER(nvmf_tcp_accept, &ttransport->transport, period); 831 if (!ttransport->accept_poller) { 832 free(ttransport); 833 return NULL; 834 } 835 836 spdk_poller_register_interrupt(ttransport->accept_poller, NULL, NULL); 837 838 ttransport->listen_sock_group = spdk_sock_group_create(NULL); 839 if (ttransport->listen_sock_group == NULL) { 840 SPDK_ERRLOG("Failed to create socket group for listen sockets\n"); 841 spdk_poller_unregister(&ttransport->accept_poller); 842 free(ttransport); 843 return NULL; 844 } 845 846 if (spdk_interrupt_mode_is_enabled()) { 847 rc = SPDK_SOCK_GROUP_REGISTER_INTERRUPT(ttransport->listen_sock_group, 848 SPDK_INTERRUPT_EVENT_IN | SPDK_INTERRUPT_EVENT_OUT, nvmf_tcp_accept, &ttransport->transport); 849 if (rc != 0) { 850 SPDK_ERRLOG("Failed to register interrupt for listen socker sock group\n"); 851 spdk_sock_group_close(&ttransport->listen_sock_group); 852 spdk_poller_unregister(&ttransport->accept_poller); 853 free(ttransport); 854 return NULL; 855 } 856 } 857 858 return &ttransport->transport; 859 } 860 861 static int 862 nvmf_tcp_trsvcid_to_int(const char *trsvcid) 863 { 864 unsigned long long ull; 865 char *end = NULL; 866 867 ull = strtoull(trsvcid, &end, 10); 868 if (end == NULL || end == trsvcid || *end != '\0') { 869 return -1; 870 } 871 872 /* Valid TCP/IP port numbers are in [1, 65535] */ 873 if (ull == 0 || ull > 65535) { 874 return -1; 875 } 876 877 return (int)ull; 878 } 879 880 /** 881 * Canonicalize a listen address trid. 882 */ 883 static int 884 nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 885 const struct spdk_nvme_transport_id *trid) 886 { 887 int trsvcid_int; 888 889 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 890 if (trsvcid_int < 0) { 891 return -EINVAL; 892 } 893 894 memset(canon_trid, 0, sizeof(*canon_trid)); 895 spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP); 896 canon_trid->adrfam = trid->adrfam; 897 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 898 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 899 900 return 0; 901 } 902 903 /** 904 * Find an existing listening port. 905 */ 906 static struct spdk_nvmf_tcp_port * 907 nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 908 const struct spdk_nvme_transport_id *trid) 909 { 910 struct spdk_nvme_transport_id canon_trid; 911 struct spdk_nvmf_tcp_port *port; 912 913 if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 914 return NULL; 915 } 916 917 TAILQ_FOREACH(port, &ttransport->ports, link) { 918 if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) { 919 return port; 920 } 921 } 922 923 return NULL; 924 } 925 926 static int 927 tcp_sock_get_key(uint8_t *out, int out_len, const char **cipher, const char *pskid, 928 void *get_key_ctx) 929 { 930 struct tcp_psk_entry *entry; 931 struct spdk_nvmf_tcp_transport *ttransport = get_key_ctx; 932 size_t psk_len; 933 int rc; 934 935 TAILQ_FOREACH(entry, &ttransport->psks, link) { 936 if (strcmp(pskid, entry->pskid) != 0) { 937 continue; 938 } 939 940 psk_len = entry->psk_size; 941 if ((size_t)out_len < psk_len) { 942 SPDK_ERRLOG("Out buffer of size: %" PRIu32 " cannot fit PSK of len: %lu\n", 943 out_len, psk_len); 944 return -ENOBUFS; 945 } 946 947 /* Convert PSK to the TLS PSK format. */ 948 rc = nvme_tcp_derive_tls_psk(entry->psk, psk_len, pskid, out, out_len, 949 entry->tls_cipher_suite); 950 if (rc < 0) { 951 SPDK_ERRLOG("Could not generate TLS PSK\n"); 952 } 953 954 switch (entry->tls_cipher_suite) { 955 case NVME_TCP_CIPHER_AES_128_GCM_SHA256: 956 *cipher = "TLS_AES_128_GCM_SHA256"; 957 break; 958 case NVME_TCP_CIPHER_AES_256_GCM_SHA384: 959 *cipher = "TLS_AES_256_GCM_SHA384"; 960 break; 961 default: 962 *cipher = NULL; 963 return -ENOTSUP; 964 } 965 966 return rc; 967 } 968 969 SPDK_ERRLOG("Could not find PSK for identity: %s\n", pskid); 970 971 return -EINVAL; 972 } 973 974 static int 975 nvmf_tcp_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid, 976 struct spdk_nvmf_listen_opts *listen_opts) 977 { 978 struct spdk_nvmf_tcp_transport *ttransport; 979 struct spdk_nvmf_tcp_port *port; 980 int trsvcid_int; 981 uint8_t adrfam; 982 const char *sock_impl_name; 983 struct spdk_sock_impl_opts impl_opts; 984 size_t impl_opts_size = sizeof(impl_opts); 985 struct spdk_sock_opts opts; 986 int rc; 987 988 if (!strlen(trid->trsvcid)) { 989 SPDK_ERRLOG("Service id is required\n"); 990 return -EINVAL; 991 } 992 993 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 994 995 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 996 if (trsvcid_int < 0) { 997 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 998 return -EINVAL; 999 } 1000 1001 port = calloc(1, sizeof(*port)); 1002 if (!port) { 1003 SPDK_ERRLOG("Port allocation failed\n"); 1004 return -ENOMEM; 1005 } 1006 1007 port->trid = trid; 1008 1009 sock_impl_name = NULL; 1010 1011 opts.opts_size = sizeof(opts); 1012 spdk_sock_get_default_opts(&opts); 1013 opts.priority = ttransport->tcp_opts.sock_priority; 1014 opts.ack_timeout = transport->opts.ack_timeout; 1015 if (listen_opts->secure_channel) { 1016 if (listen_opts->sock_impl && 1017 strncmp("ssl", listen_opts->sock_impl, strlen(listen_opts->sock_impl))) { 1018 SPDK_ERRLOG("Enabling secure_channel while specifying a sock_impl different from 'ssl' is unsupported"); 1019 free(port); 1020 return -EINVAL; 1021 } 1022 listen_opts->sock_impl = "ssl"; 1023 } 1024 1025 if (listen_opts->sock_impl) { 1026 sock_impl_name = listen_opts->sock_impl; 1027 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 1028 1029 if (!strncmp("ssl", sock_impl_name, strlen(sock_impl_name))) { 1030 if (!g_tls_log) { 1031 SPDK_NOTICELOG("TLS support is considered experimental\n"); 1032 g_tls_log = true; 1033 } 1034 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 1035 impl_opts.get_key = tcp_sock_get_key; 1036 impl_opts.get_key_ctx = ttransport; 1037 impl_opts.tls_cipher_suites = "TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256"; 1038 } 1039 1040 opts.impl_opts = &impl_opts; 1041 opts.impl_opts_size = sizeof(impl_opts); 1042 } 1043 1044 port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int, 1045 sock_impl_name, &opts); 1046 if (port->listen_sock == NULL) { 1047 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 1048 trid->traddr, trsvcid_int, 1049 spdk_strerror(errno), errno); 1050 free(port); 1051 return -errno; 1052 } 1053 1054 if (spdk_sock_is_ipv4(port->listen_sock)) { 1055 adrfam = SPDK_NVMF_ADRFAM_IPV4; 1056 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 1057 adrfam = SPDK_NVMF_ADRFAM_IPV6; 1058 } else { 1059 SPDK_ERRLOG("Unhandled socket type\n"); 1060 adrfam = 0; 1061 } 1062 1063 if (adrfam != trid->adrfam) { 1064 SPDK_ERRLOG("Socket address family mismatch\n"); 1065 spdk_sock_close(&port->listen_sock); 1066 free(port); 1067 return -EINVAL; 1068 } 1069 1070 rc = spdk_sock_group_add_sock(ttransport->listen_sock_group, port->listen_sock, nvmf_tcp_accept_cb, 1071 port); 1072 if (rc < 0) { 1073 SPDK_ERRLOG("Failed to add socket to the listen socket group\n"); 1074 spdk_sock_close(&port->listen_sock); 1075 free(port); 1076 return -errno; 1077 } 1078 1079 port->transport = transport; 1080 1081 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n", 1082 trid->traddr, trid->trsvcid); 1083 1084 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 1085 return 0; 1086 } 1087 1088 static void 1089 nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 1090 const struct spdk_nvme_transport_id *trid) 1091 { 1092 struct spdk_nvmf_tcp_transport *ttransport; 1093 struct spdk_nvmf_tcp_port *port; 1094 1095 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1096 1097 SPDK_DEBUGLOG(nvmf_tcp, "Removing listen address %s port %s\n", 1098 trid->traddr, trid->trsvcid); 1099 1100 port = nvmf_tcp_find_port(ttransport, trid); 1101 if (port) { 1102 spdk_sock_group_remove_sock(ttransport->listen_sock_group, port->listen_sock); 1103 TAILQ_REMOVE(&ttransport->ports, port, link); 1104 spdk_sock_close(&port->listen_sock); 1105 free(port); 1106 } 1107 } 1108 1109 static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1110 enum nvme_tcp_pdu_recv_state state); 1111 1112 static void 1113 nvmf_tcp_qpair_set_state(struct spdk_nvmf_tcp_qpair *tqpair, enum nvmf_tcp_qpair_state state) 1114 { 1115 tqpair->state = state; 1116 spdk_trace_record(TRACE_TCP_QP_STATE_CHANGE, tqpair->qpair.trace_id, 0, 0, 1117 (uint64_t)tqpair->state); 1118 } 1119 1120 static void 1121 nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair) 1122 { 1123 SPDK_DEBUGLOG(nvmf_tcp, "Disconnecting qpair %p\n", tqpair); 1124 1125 spdk_trace_record(TRACE_TCP_QP_DISCONNECT, tqpair->qpair.trace_id, 0, 0); 1126 1127 if (tqpair->state <= NVMF_TCP_QPAIR_STATE_RUNNING) { 1128 nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_EXITING); 1129 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1130 spdk_poller_unregister(&tqpair->timeout_poller); 1131 1132 /* This will end up calling nvmf_tcp_close_qpair */ 1133 spdk_nvmf_qpair_disconnect(&tqpair->qpair); 1134 } 1135 } 1136 1137 static void 1138 _mgmt_pdu_write_done(void *_tqpair, int err) 1139 { 1140 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 1141 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 1142 1143 if (spdk_unlikely(err != 0)) { 1144 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1145 return; 1146 } 1147 1148 assert(pdu->cb_fn != NULL); 1149 pdu->cb_fn(pdu->cb_arg); 1150 } 1151 1152 static void 1153 _req_pdu_write_done(void *req, int err) 1154 { 1155 struct spdk_nvmf_tcp_req *tcp_req = req; 1156 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 1157 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1158 1159 assert(tcp_req->pdu_in_use); 1160 tcp_req->pdu_in_use = false; 1161 1162 /* If the request is in a completed state, we're waiting for write completion to free it */ 1163 if (spdk_unlikely(tcp_req->state == TCP_REQUEST_STATE_COMPLETED)) { 1164 nvmf_tcp_request_free(tcp_req); 1165 return; 1166 } 1167 1168 if (spdk_unlikely(err != 0)) { 1169 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1170 return; 1171 } 1172 1173 assert(pdu->cb_fn != NULL); 1174 pdu->cb_fn(pdu->cb_arg); 1175 } 1176 1177 static void 1178 _pdu_write_done(struct nvme_tcp_pdu *pdu, int err) 1179 { 1180 pdu->sock_req.cb_fn(pdu->sock_req.cb_arg, err); 1181 } 1182 1183 static void 1184 tcp_sock_flush_cb(void *arg) 1185 { 1186 struct spdk_nvmf_tcp_qpair *tqpair = arg; 1187 int rc = spdk_sock_flush(tqpair->sock); 1188 1189 if (rc < 0 && errno == EAGAIN) { 1190 spdk_thread_send_msg(spdk_get_thread(), tcp_sock_flush_cb, tqpair); 1191 return; 1192 } 1193 1194 tqpair->pending_flush = false; 1195 if (rc < 0) { 1196 SPDK_ERRLOG("Could not write to socket: rc=%d, errno=%d\n", rc, errno); 1197 } 1198 } 1199 1200 static void 1201 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 1202 { 1203 int rc; 1204 uint32_t mapped_length; 1205 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1206 1207 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 1208 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, &mapped_length); 1209 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 1210 1211 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 1212 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 1213 /* Try to force the send immediately. */ 1214 rc = spdk_sock_flush(tqpair->sock); 1215 if (rc > 0 && (uint32_t)rc == mapped_length) { 1216 _pdu_write_done(pdu, 0); 1217 } else { 1218 SPDK_ERRLOG("Could not write %s to socket: rc=%d, errno=%d\n", 1219 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ? 1220 "IC_RESP" : "TERM_REQ", rc, errno); 1221 _pdu_write_done(pdu, rc >= 0 ? -EAGAIN : -errno); 1222 } 1223 } else if (spdk_interrupt_mode_is_enabled()) { 1224 /* Async writes must be flushed */ 1225 if (!tqpair->pending_flush) { 1226 tqpair->pending_flush = true; 1227 spdk_thread_send_msg(spdk_get_thread(), tcp_sock_flush_cb, tqpair); 1228 } 1229 } 1230 } 1231 1232 static void 1233 data_crc32_accel_done(void *cb_arg, int status) 1234 { 1235 struct nvme_tcp_pdu *pdu = cb_arg; 1236 1237 if (spdk_unlikely(status)) { 1238 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1239 _pdu_write_done(pdu, status); 1240 return; 1241 } 1242 1243 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1244 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1245 1246 _tcp_write_pdu(pdu); 1247 } 1248 1249 static void 1250 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 1251 { 1252 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1253 int rc = 0; 1254 1255 /* Data Digest */ 1256 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { 1257 /* Only support this limitated case for the first step */ 1258 if (spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1259 && tqpair->group)) { 1260 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 1261 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 1262 if (spdk_likely(rc == 0)) { 1263 return; 1264 } 1265 } else { 1266 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1267 } 1268 data_crc32_accel_done(pdu, rc); 1269 } else { 1270 _tcp_write_pdu(pdu); 1271 } 1272 } 1273 1274 static void 1275 nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1276 struct nvme_tcp_pdu *pdu, 1277 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1278 void *cb_arg) 1279 { 1280 int hlen; 1281 uint32_t crc32c; 1282 1283 assert(tqpair->pdu_in_progress != pdu); 1284 1285 hlen = pdu->hdr.common.hlen; 1286 pdu->cb_fn = cb_fn; 1287 pdu->cb_arg = cb_arg; 1288 1289 pdu->iov[0].iov_base = &pdu->hdr.raw; 1290 pdu->iov[0].iov_len = hlen; 1291 1292 /* Header Digest */ 1293 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { 1294 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1295 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 1296 } 1297 1298 /* Data Digest */ 1299 pdu_data_crc32_compute(pdu); 1300 } 1301 1302 static void 1303 nvmf_tcp_qpair_write_mgmt_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1304 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1305 void *cb_arg) 1306 { 1307 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 1308 1309 pdu->sock_req.cb_fn = _mgmt_pdu_write_done; 1310 pdu->sock_req.cb_arg = tqpair; 1311 1312 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1313 } 1314 1315 static void 1316 nvmf_tcp_qpair_write_req_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1317 struct spdk_nvmf_tcp_req *tcp_req, 1318 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1319 void *cb_arg) 1320 { 1321 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 1322 1323 pdu->sock_req.cb_fn = _req_pdu_write_done; 1324 pdu->sock_req.cb_arg = tcp_req; 1325 1326 assert(!tcp_req->pdu_in_use); 1327 tcp_req->pdu_in_use = true; 1328 1329 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1330 } 1331 1332 static int 1333 nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair) 1334 { 1335 uint32_t i; 1336 struct spdk_nvmf_transport_opts *opts; 1337 uint32_t in_capsule_data_size; 1338 1339 opts = &tqpair->qpair.transport->opts; 1340 1341 in_capsule_data_size = opts->in_capsule_data_size; 1342 if (opts->dif_insert_or_strip) { 1343 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 1344 } 1345 1346 tqpair->resource_count = opts->max_queue_depth; 1347 1348 tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs)); 1349 if (!tqpair->reqs) { 1350 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 1351 return -1; 1352 } 1353 1354 if (in_capsule_data_size) { 1355 tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000, 1356 NULL, SPDK_ENV_LCORE_ID_ANY, 1357 SPDK_MALLOC_DMA); 1358 if (!tqpair->bufs) { 1359 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 1360 return -1; 1361 } 1362 } 1363 /* prepare memory space for receiving pdus and tcp_req */ 1364 /* Add additional 1 member, which will be used for mgmt_pdu owned by the tqpair */ 1365 tqpair->pdus = spdk_dma_zmalloc((2 * tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000, 1366 NULL); 1367 if (!tqpair->pdus) { 1368 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 1369 return -1; 1370 } 1371 1372 for (i = 0; i < tqpair->resource_count; i++) { 1373 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 1374 1375 tcp_req->ttag = i + 1; 1376 tcp_req->req.qpair = &tqpair->qpair; 1377 1378 tcp_req->pdu = &tqpair->pdus[i]; 1379 tcp_req->pdu->qpair = tqpair; 1380 1381 /* Set up memory to receive commands */ 1382 if (tqpair->bufs) { 1383 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 1384 } 1385 1386 /* Set the cmdn and rsp */ 1387 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 1388 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 1389 1390 tcp_req->req.stripped_data = NULL; 1391 1392 /* Initialize request state to FREE */ 1393 tcp_req->state = TCP_REQUEST_STATE_FREE; 1394 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 1395 tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++; 1396 } 1397 1398 for (; i < 2 * tqpair->resource_count; i++) { 1399 struct nvme_tcp_pdu *pdu = &tqpair->pdus[i]; 1400 1401 pdu->qpair = tqpair; 1402 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 1403 } 1404 1405 tqpair->mgmt_pdu = &tqpair->pdus[i]; 1406 tqpair->mgmt_pdu->qpair = tqpair; 1407 tqpair->pdu_in_progress = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 1408 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 1409 tqpair->tcp_pdu_working_count = 1; 1410 1411 tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * 1412 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1413 1414 return 0; 1415 } 1416 1417 static int 1418 nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 1419 { 1420 struct spdk_nvmf_tcp_qpair *tqpair; 1421 1422 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1423 1424 SPDK_DEBUGLOG(nvmf_tcp, "New TCP Connection: %p\n", qpair); 1425 1426 spdk_trace_record(TRACE_TCP_QP_CREATE, tqpair->qpair.trace_id, 0, 0); 1427 1428 /* Initialise request state queues of the qpair */ 1429 TAILQ_INIT(&tqpair->tcp_req_free_queue); 1430 TAILQ_INIT(&tqpair->tcp_req_working_queue); 1431 SLIST_INIT(&tqpair->tcp_pdu_free_queue); 1432 tqpair->qpair.queue_depth = 0; 1433 1434 tqpair->host_hdgst_enable = true; 1435 tqpair->host_ddgst_enable = true; 1436 1437 return 0; 1438 } 1439 1440 static int 1441 nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 1442 { 1443 char saddr[32], caddr[32]; 1444 uint16_t sport, cport; 1445 char owner[256]; 1446 int rc; 1447 1448 rc = spdk_sock_getaddr(tqpair->sock, saddr, sizeof(saddr), &sport, 1449 caddr, sizeof(caddr), &cport); 1450 if (rc != 0) { 1451 SPDK_ERRLOG("spdk_sock_getaddr() failed\n"); 1452 return rc; 1453 } 1454 snprintf(owner, sizeof(owner), "%s:%d", caddr, cport); 1455 tqpair->qpair.trace_id = spdk_trace_register_owner(OWNER_TYPE_NVMF_TCP, owner); 1456 spdk_trace_record(TRACE_TCP_QP_SOCK_INIT, tqpair->qpair.trace_id, 0, 0); 1457 1458 /* set low water mark */ 1459 rc = spdk_sock_set_recvlowat(tqpair->sock, 1); 1460 if (rc != 0) { 1461 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 1462 return rc; 1463 } 1464 1465 return 0; 1466 } 1467 1468 static void 1469 nvmf_tcp_handle_connect(struct spdk_nvmf_tcp_port *port, struct spdk_sock *sock) 1470 { 1471 struct spdk_nvmf_tcp_qpair *tqpair; 1472 int rc; 1473 1474 SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n", 1475 port->trid->traddr, port->trid->trsvcid); 1476 1477 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 1478 if (tqpair == NULL) { 1479 SPDK_ERRLOG("Could not allocate new connection.\n"); 1480 spdk_sock_close(&sock); 1481 return; 1482 } 1483 1484 tqpair->sock = sock; 1485 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0; 1486 tqpair->port = port; 1487 tqpair->qpair.transport = port->transport; 1488 tqpair->qpair.numa.id_valid = 1; 1489 tqpair->qpair.numa.id = spdk_sock_get_numa_id(sock); 1490 1491 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 1492 sizeof(tqpair->target_addr), &tqpair->target_port, 1493 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 1494 &tqpair->initiator_port); 1495 if (rc < 0) { 1496 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 1497 nvmf_tcp_qpair_destroy(tqpair); 1498 return; 1499 } 1500 1501 spdk_nvmf_tgt_new_qpair(port->transport->tgt, &tqpair->qpair); 1502 } 1503 1504 static uint32_t 1505 nvmf_tcp_port_accept(struct spdk_nvmf_tcp_port *port) 1506 { 1507 struct spdk_sock *sock; 1508 uint32_t count = 0; 1509 int i; 1510 1511 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 1512 sock = spdk_sock_accept(port->listen_sock); 1513 if (sock == NULL) { 1514 break; 1515 } 1516 count++; 1517 nvmf_tcp_handle_connect(port, sock); 1518 } 1519 1520 return count; 1521 } 1522 1523 static int 1524 nvmf_tcp_accept(void *ctx) 1525 { 1526 struct spdk_nvmf_transport *transport = ctx; 1527 struct spdk_nvmf_tcp_transport *ttransport; 1528 int count; 1529 1530 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1531 1532 count = spdk_sock_group_poll(ttransport->listen_sock_group); 1533 if (count < 0) { 1534 SPDK_ERRLOG("Fail in TCP listen socket group poll\n"); 1535 } 1536 1537 return count != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 1538 } 1539 1540 static void 1541 nvmf_tcp_accept_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1542 { 1543 struct spdk_nvmf_tcp_port *port = ctx; 1544 1545 nvmf_tcp_port_accept(port); 1546 } 1547 1548 static void 1549 nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1550 struct spdk_nvme_transport_id *trid, 1551 struct spdk_nvmf_discovery_log_page_entry *entry) 1552 { 1553 struct spdk_nvmf_tcp_port *port; 1554 struct spdk_nvmf_tcp_transport *ttransport; 1555 1556 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1557 entry->adrfam = trid->adrfam; 1558 1559 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1560 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1561 1562 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1563 port = nvmf_tcp_find_port(ttransport, trid); 1564 1565 assert(port != NULL); 1566 1567 if (strcmp(spdk_sock_get_impl_name(port->listen_sock), "ssl") == 0) { 1568 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED; 1569 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_TLS_1_3; 1570 } else { 1571 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; 1572 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1573 } 1574 } 1575 1576 static struct spdk_nvmf_tcp_control_msg_list * 1577 nvmf_tcp_control_msg_list_create(uint16_t num_messages) 1578 { 1579 struct spdk_nvmf_tcp_control_msg_list *list; 1580 struct spdk_nvmf_tcp_control_msg *msg; 1581 uint16_t i; 1582 1583 list = calloc(1, sizeof(*list)); 1584 if (!list) { 1585 SPDK_ERRLOG("Failed to allocate memory for list structure\n"); 1586 return NULL; 1587 } 1588 1589 list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, 1590 NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA); 1591 if (!list->msg_buf) { 1592 SPDK_ERRLOG("Failed to allocate memory for control message buffers\n"); 1593 free(list); 1594 return NULL; 1595 } 1596 1597 STAILQ_INIT(&list->free_msgs); 1598 STAILQ_INIT(&list->waiting_for_msg_reqs); 1599 1600 for (i = 0; i < num_messages; i++) { 1601 msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i * 1602 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1603 STAILQ_INSERT_TAIL(&list->free_msgs, msg, link); 1604 } 1605 1606 return list; 1607 } 1608 1609 static void 1610 nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list) 1611 { 1612 if (!list) { 1613 return; 1614 } 1615 1616 spdk_free(list->msg_buf); 1617 free(list); 1618 } 1619 1620 static int nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group); 1621 1622 static int 1623 nvmf_tcp_poll_group_intr(void *ctx) 1624 { 1625 struct spdk_nvmf_transport_poll_group *group = ctx; 1626 int ret = 0; 1627 1628 ret = nvmf_tcp_poll_group_poll(group); 1629 1630 return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 1631 } 1632 1633 static struct spdk_nvmf_transport_poll_group * 1634 nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport, 1635 struct spdk_nvmf_poll_group *group) 1636 { 1637 struct spdk_nvmf_tcp_transport *ttransport; 1638 struct spdk_nvmf_tcp_poll_group *tgroup; 1639 int rc; 1640 1641 tgroup = calloc(1, sizeof(*tgroup)); 1642 if (!tgroup) { 1643 return NULL; 1644 } 1645 1646 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1647 if (!tgroup->sock_group) { 1648 goto cleanup; 1649 } 1650 1651 TAILQ_INIT(&tgroup->qpairs); 1652 TAILQ_INIT(&tgroup->await_req); 1653 1654 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1655 1656 if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 1657 SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). " 1658 "Creating control messages list\n", transport->opts.in_capsule_data_size, 1659 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1660 tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(ttransport->tcp_opts.control_msg_num); 1661 if (!tgroup->control_msg_list) { 1662 goto cleanup; 1663 } 1664 } 1665 1666 tgroup->accel_channel = spdk_accel_get_io_channel(); 1667 if (spdk_unlikely(!tgroup->accel_channel)) { 1668 SPDK_ERRLOG("Cannot create accel_channel for tgroup=%p\n", tgroup); 1669 goto cleanup; 1670 } 1671 1672 TAILQ_INSERT_TAIL(&ttransport->poll_groups, tgroup, link); 1673 if (ttransport->next_pg == NULL) { 1674 ttransport->next_pg = tgroup; 1675 } 1676 1677 if (spdk_interrupt_mode_is_enabled()) { 1678 rc = SPDK_SOCK_GROUP_REGISTER_INTERRUPT(tgroup->sock_group, 1679 SPDK_INTERRUPT_EVENT_IN | SPDK_INTERRUPT_EVENT_OUT, nvmf_tcp_poll_group_intr, &tgroup->group); 1680 if (rc != 0) { 1681 SPDK_ERRLOG("Failed to register interrupt for sock group\n"); 1682 goto cleanup; 1683 } 1684 } 1685 1686 return &tgroup->group; 1687 1688 cleanup: 1689 nvmf_tcp_poll_group_destroy(&tgroup->group); 1690 return NULL; 1691 } 1692 1693 static struct spdk_nvmf_transport_poll_group * 1694 nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1695 { 1696 struct spdk_nvmf_tcp_transport *ttransport; 1697 struct spdk_nvmf_tcp_poll_group **pg; 1698 struct spdk_nvmf_tcp_qpair *tqpair; 1699 struct spdk_sock_group *group = NULL, *hint = NULL; 1700 int rc; 1701 1702 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 1703 1704 if (TAILQ_EMPTY(&ttransport->poll_groups)) { 1705 return NULL; 1706 } 1707 1708 pg = &ttransport->next_pg; 1709 assert(*pg != NULL); 1710 hint = (*pg)->sock_group; 1711 1712 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1713 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, hint); 1714 if (rc != 0) { 1715 return NULL; 1716 } else if (group != NULL) { 1717 /* Optimal poll group was found */ 1718 return spdk_sock_group_get_ctx(group); 1719 } 1720 1721 /* The hint was used for optimal poll group, advance next_pg. */ 1722 *pg = TAILQ_NEXT(*pg, link); 1723 if (*pg == NULL) { 1724 *pg = TAILQ_FIRST(&ttransport->poll_groups); 1725 } 1726 1727 return spdk_sock_group_get_ctx(hint); 1728 } 1729 1730 static void 1731 nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1732 { 1733 struct spdk_nvmf_tcp_poll_group *tgroup, *next_tgroup; 1734 struct spdk_nvmf_tcp_transport *ttransport; 1735 1736 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1737 spdk_sock_group_unregister_interrupt(tgroup->sock_group); 1738 spdk_sock_group_close(&tgroup->sock_group); 1739 if (tgroup->control_msg_list) { 1740 nvmf_tcp_control_msg_list_free(tgroup->control_msg_list); 1741 } 1742 1743 if (tgroup->accel_channel) { 1744 spdk_put_io_channel(tgroup->accel_channel); 1745 } 1746 1747 if (tgroup->group.transport == NULL) { 1748 /* Transport can be NULL when nvmf_tcp_poll_group_create() 1749 * calls this function directly in a failure path. */ 1750 free(tgroup); 1751 return; 1752 } 1753 1754 ttransport = SPDK_CONTAINEROF(tgroup->group.transport, struct spdk_nvmf_tcp_transport, transport); 1755 1756 next_tgroup = TAILQ_NEXT(tgroup, link); 1757 TAILQ_REMOVE(&ttransport->poll_groups, tgroup, link); 1758 if (next_tgroup == NULL) { 1759 next_tgroup = TAILQ_FIRST(&ttransport->poll_groups); 1760 } 1761 if (ttransport->next_pg == tgroup) { 1762 ttransport->next_pg = next_tgroup; 1763 } 1764 1765 free(tgroup); 1766 } 1767 1768 static void 1769 nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1770 enum nvme_tcp_pdu_recv_state state) 1771 { 1772 if (tqpair->recv_state == state) { 1773 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1774 tqpair, state); 1775 return; 1776 } 1777 1778 if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_QUIESCING)) { 1779 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH && tqpair->pdu_in_progress) { 1780 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, tqpair->pdu_in_progress, slist); 1781 tqpair->tcp_pdu_working_count--; 1782 } 1783 } 1784 1785 if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_ERROR)) { 1786 assert(tqpair->tcp_pdu_working_count == 0); 1787 } 1788 1789 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1790 /* When leaving the await req state, move the qpair to the main list */ 1791 TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link); 1792 TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link); 1793 } else if (state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1794 TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link); 1795 TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link); 1796 } 1797 1798 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state); 1799 tqpair->recv_state = state; 1800 1801 spdk_trace_record(TRACE_TCP_QP_RCV_STATE_CHANGE, tqpair->qpair.trace_id, 0, 0, 1802 (uint64_t)tqpair->recv_state); 1803 } 1804 1805 static int 1806 nvmf_tcp_qpair_handle_timeout(void *ctx) 1807 { 1808 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1809 1810 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1811 1812 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1813 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1814 1815 nvmf_tcp_qpair_disconnect(tqpair); 1816 return SPDK_POLLER_BUSY; 1817 } 1818 1819 static void 1820 nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1821 { 1822 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1823 1824 if (!tqpair->timeout_poller) { 1825 tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair, 1826 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1827 } 1828 } 1829 1830 static void 1831 nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1832 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1833 { 1834 struct nvme_tcp_pdu *rsp_pdu; 1835 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1836 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1837 uint32_t copy_len; 1838 1839 rsp_pdu = tqpair->mgmt_pdu; 1840 1841 c2h_term_req = &rsp_pdu->hdr.term_req; 1842 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1843 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1844 c2h_term_req->fes = fes; 1845 1846 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1847 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1848 DSET32(&c2h_term_req->fei, error_offset); 1849 } 1850 1851 copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); 1852 1853 /* Copy the error info into the buffer */ 1854 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1855 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1856 1857 /* Contain the header of the wrong received pdu */ 1858 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1859 tqpair->wait_terminate = true; 1860 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1861 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_c2h_term_req_complete, tqpair); 1862 } 1863 1864 static void 1865 nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1866 struct spdk_nvmf_tcp_qpair *tqpair, 1867 struct nvme_tcp_pdu *pdu) 1868 { 1869 struct spdk_nvmf_tcp_req *tcp_req; 1870 1871 assert(pdu->psh_valid_bytes == pdu->psh_len); 1872 assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD); 1873 1874 tcp_req = nvmf_tcp_req_get(tqpair); 1875 if (!tcp_req) { 1876 /* Directly return and make the allocation retry again. This can happen if we're 1877 * using asynchronous writes to send the response to the host or when releasing 1878 * zero-copy buffers after a response has been sent. In both cases, the host might 1879 * receive the response before we've finished processing the request and is free to 1880 * send another one. 1881 */ 1882 if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0 || 1883 tqpair->state_cntr[TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE] > 0) { 1884 return; 1885 } 1886 1887 /* The host sent more commands than the maximum queue depth. */ 1888 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 1889 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1890 return; 1891 } 1892 1893 pdu->req = tcp_req; 1894 assert(tcp_req->state == TCP_REQUEST_STATE_NEW); 1895 nvmf_tcp_req_process(ttransport, tcp_req); 1896 } 1897 1898 static void 1899 nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1900 struct spdk_nvmf_tcp_qpair *tqpair, 1901 struct nvme_tcp_pdu *pdu) 1902 { 1903 struct spdk_nvmf_tcp_req *tcp_req; 1904 struct spdk_nvme_tcp_cmd *capsule_cmd; 1905 uint32_t error_offset = 0; 1906 enum spdk_nvme_tcp_term_req_fes fes; 1907 struct spdk_nvme_cpl *rsp; 1908 1909 capsule_cmd = &pdu->hdr.capsule_cmd; 1910 tcp_req = pdu->req; 1911 assert(tcp_req != NULL); 1912 1913 /* Zero-copy requests don't support ICD */ 1914 assert(!spdk_nvmf_request_using_zcopy(&tcp_req->req)); 1915 1916 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1917 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1918 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1919 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1920 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1921 goto err; 1922 } 1923 1924 rsp = &tcp_req->req.rsp->nvme_cpl; 1925 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 1926 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 1927 } else { 1928 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1929 } 1930 1931 nvmf_tcp_req_process(ttransport, tcp_req); 1932 1933 return; 1934 err: 1935 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1936 } 1937 1938 static void 1939 nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1940 struct spdk_nvmf_tcp_qpair *tqpair, 1941 struct nvme_tcp_pdu *pdu) 1942 { 1943 struct spdk_nvmf_tcp_req *tcp_req; 1944 uint32_t error_offset = 0; 1945 enum spdk_nvme_tcp_term_req_fes fes = 0; 1946 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1947 1948 h2c_data = &pdu->hdr.h2c_data; 1949 1950 SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1951 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1952 1953 if (h2c_data->ttag > tqpair->resource_count) { 1954 SPDK_DEBUGLOG(nvmf_tcp, "ttag %u is larger than allowed %u.\n", h2c_data->ttag, 1955 tqpair->resource_count); 1956 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1957 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1958 goto err; 1959 } 1960 1961 tcp_req = &tqpair->reqs[h2c_data->ttag - 1]; 1962 1963 if (spdk_unlikely(tcp_req->state != TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER && 1964 tcp_req->state != TCP_REQUEST_STATE_AWAITING_R2T_ACK)) { 1965 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, has error state in %d\n", tcp_req, tqpair, 1966 tcp_req->state); 1967 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1968 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1969 goto err; 1970 } 1971 1972 if (spdk_unlikely(tcp_req->req.cmd->nvme_cmd.cid != h2c_data->cccid)) { 1973 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, expected %u but %u for cccid.\n", tcp_req, tqpair, 1974 tcp_req->req.cmd->nvme_cmd.cid, h2c_data->cccid); 1975 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1976 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1977 goto err; 1978 } 1979 1980 if (tcp_req->h2c_offset != h2c_data->datao) { 1981 SPDK_DEBUGLOG(nvmf_tcp, 1982 "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n", 1983 tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao); 1984 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1985 goto err; 1986 } 1987 1988 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1989 SPDK_DEBUGLOG(nvmf_tcp, 1990 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) exceeds requested length=%u\n", 1991 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1992 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1993 goto err; 1994 } 1995 1996 pdu->req = tcp_req; 1997 1998 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 1999 pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 2000 } 2001 2002 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2003 h2c_data->datao, h2c_data->datal); 2004 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2005 return; 2006 2007 err: 2008 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2009 } 2010 2011 static void 2012 nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 2013 struct spdk_nvmf_tcp_qpair *tqpair) 2014 { 2015 struct nvme_tcp_pdu *rsp_pdu; 2016 struct spdk_nvme_tcp_rsp *capsule_resp; 2017 2018 SPDK_DEBUGLOG(nvmf_tcp, "enter, tqpair=%p\n", tqpair); 2019 2020 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 2021 assert(rsp_pdu != NULL); 2022 2023 capsule_resp = &rsp_pdu->hdr.capsule_resp; 2024 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 2025 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 2026 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 2027 if (tqpair->host_hdgst_enable) { 2028 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2029 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 2030 } 2031 2032 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_request_free, tcp_req); 2033 } 2034 2035 static void 2036 nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 2037 { 2038 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 2039 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 2040 struct spdk_nvmf_tcp_qpair, qpair); 2041 2042 assert(tqpair != NULL); 2043 2044 if (spdk_unlikely(tcp_req->pdu->rw_offset < tcp_req->req.length)) { 2045 SPDK_DEBUGLOG(nvmf_tcp, "sending another C2H part, offset %u length %u\n", tcp_req->pdu->rw_offset, 2046 tcp_req->req.length); 2047 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2048 return; 2049 } 2050 2051 if (tcp_req->pdu->hdr.c2h_data.common.flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 2052 nvmf_tcp_request_free(tcp_req); 2053 } else { 2054 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2055 } 2056 } 2057 2058 static void 2059 nvmf_tcp_r2t_complete(void *cb_arg) 2060 { 2061 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 2062 struct spdk_nvmf_tcp_transport *ttransport; 2063 2064 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 2065 struct spdk_nvmf_tcp_transport, transport); 2066 2067 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2068 2069 if (tcp_req->h2c_offset == tcp_req->req.length) { 2070 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2071 nvmf_tcp_req_process(ttransport, tcp_req); 2072 } 2073 } 2074 2075 static void 2076 nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 2077 struct spdk_nvmf_tcp_req *tcp_req) 2078 { 2079 struct nvme_tcp_pdu *rsp_pdu; 2080 struct spdk_nvme_tcp_r2t_hdr *r2t; 2081 2082 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 2083 assert(rsp_pdu != NULL); 2084 2085 r2t = &rsp_pdu->hdr.r2t; 2086 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 2087 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 2088 2089 if (tqpair->host_hdgst_enable) { 2090 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2091 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 2092 } 2093 2094 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2095 r2t->ttag = tcp_req->ttag; 2096 r2t->r2to = tcp_req->h2c_offset; 2097 r2t->r2tl = tcp_req->req.length; 2098 2099 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 2100 2101 SPDK_DEBUGLOG(nvmf_tcp, 2102 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 2103 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 2104 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_r2t_complete, tcp_req); 2105 } 2106 2107 static void 2108 nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 2109 struct spdk_nvmf_tcp_qpair *tqpair, 2110 struct nvme_tcp_pdu *pdu) 2111 { 2112 struct spdk_nvmf_tcp_req *tcp_req; 2113 struct spdk_nvme_cpl *rsp; 2114 2115 tcp_req = pdu->req; 2116 assert(tcp_req != NULL); 2117 2118 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2119 2120 tcp_req->h2c_offset += pdu->data_len; 2121 2122 /* Wait for all of the data to arrive AND for the initial R2T PDU send to be 2123 * acknowledged before moving on. */ 2124 if (tcp_req->h2c_offset == tcp_req->req.length && 2125 tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { 2126 /* After receiving all the h2c data, we need to check whether there is 2127 * transient transport error */ 2128 rsp = &tcp_req->req.rsp->nvme_cpl; 2129 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 2130 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2131 } else { 2132 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2133 } 2134 nvmf_tcp_req_process(ttransport, tcp_req); 2135 } 2136 } 2137 2138 static void 2139 nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 2140 { 2141 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 2142 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 2143 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 2144 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 2145 SPDK_DEBUGLOG(nvmf_tcp, "The offset from the start of the PDU header is %u\n", 2146 DGET32(h2c_term_req->fei)); 2147 } 2148 } 2149 2150 static void 2151 nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 2152 struct nvme_tcp_pdu *pdu) 2153 { 2154 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 2155 uint32_t error_offset = 0; 2156 enum spdk_nvme_tcp_term_req_fes fes; 2157 2158 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 2159 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 2160 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2161 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 2162 goto end; 2163 } 2164 2165 /* set the data buffer */ 2166 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 2167 h2c_term_req->common.plen - h2c_term_req->common.hlen); 2168 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2169 return; 2170 end: 2171 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2172 } 2173 2174 static void 2175 nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 2176 struct nvme_tcp_pdu *pdu) 2177 { 2178 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 2179 2180 nvmf_tcp_h2c_term_req_dump(h2c_term_req); 2181 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2182 } 2183 2184 static void 2185 _nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 2186 { 2187 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 2188 struct spdk_nvmf_tcp_transport, transport); 2189 2190 switch (pdu->hdr.common.pdu_type) { 2191 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2192 nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 2193 break; 2194 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2195 nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 2196 break; 2197 2198 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2199 nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 2200 break; 2201 2202 default: 2203 /* The code should not go to here */ 2204 SPDK_ERRLOG("ERROR pdu type %d\n", pdu->hdr.common.pdu_type); 2205 break; 2206 } 2207 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 2208 tqpair->tcp_pdu_working_count--; 2209 } 2210 2211 static inline void 2212 nvmf_tcp_req_set_cpl(struct spdk_nvmf_tcp_req *treq, int sct, int sc) 2213 { 2214 treq->req.rsp->nvme_cpl.status.sct = sct; 2215 treq->req.rsp->nvme_cpl.status.sc = sc; 2216 treq->req.rsp->nvme_cpl.cid = treq->req.cmd->nvme_cmd.cid; 2217 } 2218 2219 static void 2220 data_crc32_calc_done(void *cb_arg, int status) 2221 { 2222 struct nvme_tcp_pdu *pdu = cb_arg; 2223 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 2224 2225 /* async crc32 calculation is failed and use direct calculation to check */ 2226 if (spdk_unlikely(status)) { 2227 SPDK_ERRLOG("Data digest on tqpair=(%p) with pdu=%p failed to be calculated asynchronously\n", 2228 tqpair, pdu); 2229 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 2230 } 2231 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 2232 if (!MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32)) { 2233 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 2234 assert(pdu->req != NULL); 2235 nvmf_tcp_req_set_cpl(pdu->req, SPDK_NVME_SCT_GENERIC, 2236 SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR); 2237 } 2238 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2239 } 2240 2241 static void 2242 nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 2243 { 2244 int rc = 0; 2245 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2246 tqpair->pdu_in_progress = NULL; 2247 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2248 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2249 /* check data digest if need */ 2250 if (pdu->ddgst_enable) { 2251 if (tqpair->qpair.qid != 0 && !pdu->dif_ctx && tqpair->group && 2252 (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)) { 2253 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 2254 pdu->data_iovcnt, 0, data_crc32_calc_done, pdu); 2255 if (spdk_likely(rc == 0)) { 2256 return; 2257 } 2258 } else { 2259 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 2260 } 2261 data_crc32_calc_done(pdu, rc); 2262 } else { 2263 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2264 } 2265 } 2266 2267 static void 2268 nvmf_tcp_send_icresp_complete(void *cb_arg) 2269 { 2270 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 2271 2272 nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_RUNNING); 2273 } 2274 2275 static void 2276 nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 2277 struct spdk_nvmf_tcp_qpair *tqpair, 2278 struct nvme_tcp_pdu *pdu) 2279 { 2280 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 2281 struct nvme_tcp_pdu *rsp_pdu; 2282 struct spdk_nvme_tcp_ic_resp *ic_resp; 2283 uint32_t error_offset = 0; 2284 enum spdk_nvme_tcp_term_req_fes fes; 2285 2286 /* Only PFV 0 is defined currently */ 2287 if (ic_req->pfv != 0) { 2288 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 2289 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2290 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 2291 goto end; 2292 } 2293 2294 /* This value is 0’s based value in units of dwords should not be larger than SPDK_NVME_TCP_HPDA_MAX */ 2295 if (ic_req->hpda > SPDK_NVME_TCP_HPDA_MAX) { 2296 SPDK_ERRLOG("ICReq HPDA out of range 0 to 31, got %u\n", ic_req->hpda); 2297 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2298 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, hpda); 2299 goto end; 2300 } 2301 2302 /* MAXR2T is 0's based */ 2303 SPDK_DEBUGLOG(nvmf_tcp, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 2304 2305 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 2306 if (!tqpair->host_hdgst_enable) { 2307 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 2308 } 2309 2310 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 2311 if (!tqpair->host_ddgst_enable) { 2312 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 2313 } 2314 2315 tqpair->recv_buf_size = spdk_max(tqpair->recv_buf_size, MIN_SOCK_PIPE_SIZE); 2316 /* Now that we know whether digests are enabled, properly size the receive buffer */ 2317 if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) { 2318 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 2319 tqpair, 2320 tqpair->recv_buf_size); 2321 /* Not fatal. */ 2322 } 2323 2324 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 2325 SPDK_DEBUGLOG(nvmf_tcp, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 2326 2327 rsp_pdu = tqpair->mgmt_pdu; 2328 2329 ic_resp = &rsp_pdu->hdr.ic_resp; 2330 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 2331 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 2332 ic_resp->pfv = 0; 2333 ic_resp->cpda = tqpair->cpda; 2334 ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size; 2335 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 2336 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 2337 2338 SPDK_DEBUGLOG(nvmf_tcp, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 2339 SPDK_DEBUGLOG(nvmf_tcp, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 2340 2341 nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_INITIALIZING); 2342 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_icresp_complete, tqpair); 2343 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2344 return; 2345 end: 2346 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2347 } 2348 2349 static void 2350 nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair, 2351 struct spdk_nvmf_tcp_transport *ttransport) 2352 { 2353 struct nvme_tcp_pdu *pdu; 2354 int rc; 2355 uint32_t crc32c, error_offset = 0; 2356 enum spdk_nvme_tcp_term_req_fes fes; 2357 2358 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2359 pdu = tqpair->pdu_in_progress; 2360 2361 SPDK_DEBUGLOG(nvmf_tcp, "pdu type of tqpair(%p) is %d\n", tqpair, 2362 pdu->hdr.common.pdu_type); 2363 /* check header digest if needed */ 2364 if (pdu->has_hdgst) { 2365 SPDK_DEBUGLOG(nvmf_tcp, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 2366 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 2367 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 2368 if (rc == 0) { 2369 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 2370 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 2371 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2372 return; 2373 2374 } 2375 } 2376 2377 switch (pdu->hdr.common.pdu_type) { 2378 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 2379 nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 2380 break; 2381 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2382 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ); 2383 break; 2384 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2385 nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 2386 break; 2387 2388 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2389 nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 2390 break; 2391 2392 default: 2393 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress->hdr.common.pdu_type); 2394 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2395 error_offset = 1; 2396 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2397 break; 2398 } 2399 } 2400 2401 static void 2402 nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 2403 { 2404 struct nvme_tcp_pdu *pdu; 2405 uint32_t error_offset = 0; 2406 enum spdk_nvme_tcp_term_req_fes fes; 2407 uint8_t expected_hlen, pdo; 2408 bool plen_error = false, pdo_error = false; 2409 2410 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2411 pdu = tqpair->pdu_in_progress; 2412 assert(pdu); 2413 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 2414 if (tqpair->state != NVMF_TCP_QPAIR_STATE_INVALID) { 2415 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 2416 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2417 goto err; 2418 } 2419 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 2420 if (pdu->hdr.common.plen != expected_hlen) { 2421 plen_error = true; 2422 } 2423 } else { 2424 if (tqpair->state != NVMF_TCP_QPAIR_STATE_RUNNING) { 2425 SPDK_ERRLOG("The TCP/IP connection is not negotiated\n"); 2426 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2427 goto err; 2428 } 2429 2430 switch (pdu->hdr.common.pdu_type) { 2431 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2432 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 2433 pdo = pdu->hdr.common.pdo; 2434 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2435 pdo_error = true; 2436 break; 2437 } 2438 2439 if (pdu->hdr.common.plen < expected_hlen) { 2440 plen_error = true; 2441 } 2442 break; 2443 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2444 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 2445 pdo = pdu->hdr.common.pdo; 2446 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2447 pdo_error = true; 2448 break; 2449 } 2450 if (pdu->hdr.common.plen < expected_hlen) { 2451 plen_error = true; 2452 } 2453 break; 2454 2455 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2456 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 2457 if ((pdu->hdr.common.plen <= expected_hlen) || 2458 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 2459 plen_error = true; 2460 } 2461 break; 2462 2463 default: 2464 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 2465 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2466 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 2467 goto err; 2468 } 2469 } 2470 2471 if (pdu->hdr.common.hlen != expected_hlen) { 2472 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 2473 pdu->hdr.common.pdu_type, 2474 expected_hlen, pdu->hdr.common.hlen, tqpair); 2475 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2476 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 2477 goto err; 2478 } else if (pdo_error) { 2479 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2480 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 2481 } else if (plen_error) { 2482 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2483 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2484 goto err; 2485 } else { 2486 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2487 nvme_tcp_pdu_calc_psh_len(tqpair->pdu_in_progress, tqpair->host_hdgst_enable); 2488 return; 2489 } 2490 err: 2491 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2492 } 2493 2494 static int 2495 nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 2496 { 2497 int rc = 0; 2498 struct nvme_tcp_pdu *pdu; 2499 enum nvme_tcp_pdu_recv_state prev_state; 2500 uint32_t data_len; 2501 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 2502 struct spdk_nvmf_tcp_transport, transport); 2503 2504 /* The loop here is to allow for several back-to-back state changes. */ 2505 do { 2506 prev_state = tqpair->recv_state; 2507 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 2508 2509 pdu = tqpair->pdu_in_progress; 2510 assert(pdu != NULL || 2511 tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY || 2512 tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_QUIESCING || 2513 tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 2514 2515 switch (tqpair->recv_state) { 2516 /* Wait for the common header */ 2517 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 2518 if (!pdu) { 2519 pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 2520 if (spdk_unlikely(!pdu)) { 2521 return NVME_TCP_PDU_IN_PROGRESS; 2522 } 2523 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 2524 tqpair->pdu_in_progress = pdu; 2525 tqpair->tcp_pdu_working_count++; 2526 } 2527 memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair)); 2528 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2529 /* FALLTHROUGH */ 2530 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 2531 if (spdk_unlikely(tqpair->state == NVMF_TCP_QPAIR_STATE_INITIALIZING)) { 2532 return rc; 2533 } 2534 2535 rc = nvme_tcp_read_data(tqpair->sock, 2536 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 2537 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 2538 if (rc < 0) { 2539 SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair); 2540 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2541 break; 2542 } else if (rc > 0) { 2543 pdu->ch_valid_bytes += rc; 2544 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.trace_id, rc, 0); 2545 } 2546 2547 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 2548 return NVME_TCP_PDU_IN_PROGRESS; 2549 } 2550 2551 /* The command header of this PDU has now been read from the socket. */ 2552 nvmf_tcp_pdu_ch_handle(tqpair); 2553 break; 2554 /* Wait for the pdu specific header */ 2555 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 2556 rc = nvme_tcp_read_data(tqpair->sock, 2557 pdu->psh_len - pdu->psh_valid_bytes, 2558 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 2559 if (rc < 0) { 2560 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2561 break; 2562 } else if (rc > 0) { 2563 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.trace_id, rc, 0); 2564 pdu->psh_valid_bytes += rc; 2565 } 2566 2567 if (pdu->psh_valid_bytes < pdu->psh_len) { 2568 return NVME_TCP_PDU_IN_PROGRESS; 2569 } 2570 2571 /* All header(ch, psh, head digits) of this PDU has now been read from the socket. */ 2572 nvmf_tcp_pdu_psh_handle(tqpair, ttransport); 2573 break; 2574 /* Wait for the req slot */ 2575 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: 2576 nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 2577 break; 2578 /* Wait for the request processing loop to acquire a buffer for the PDU */ 2579 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF: 2580 break; 2581 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 2582 /* check whether the data is valid, if not we just return */ 2583 if (!pdu->data_len) { 2584 return NVME_TCP_PDU_IN_PROGRESS; 2585 } 2586 2587 data_len = pdu->data_len; 2588 /* data digest */ 2589 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 2590 tqpair->host_ddgst_enable)) { 2591 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2592 pdu->ddgst_enable = true; 2593 } 2594 2595 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2596 if (rc < 0) { 2597 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2598 break; 2599 } 2600 pdu->rw_offset += rc; 2601 2602 if (pdu->rw_offset < data_len) { 2603 return NVME_TCP_PDU_IN_PROGRESS; 2604 } 2605 2606 /* Generate and insert DIF to whole data block received if DIF is enabled */ 2607 if (spdk_unlikely(pdu->dif_ctx != NULL) && 2608 spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 0, data_len, 2609 pdu->dif_ctx) != 0) { 2610 SPDK_ERRLOG("DIF generate failed\n"); 2611 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2612 break; 2613 } 2614 2615 /* All of this PDU has now been read from the socket. */ 2616 nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2617 break; 2618 case NVME_TCP_PDU_RECV_STATE_QUIESCING: 2619 if (tqpair->tcp_pdu_working_count != 0) { 2620 return NVME_TCP_PDU_IN_PROGRESS; 2621 } 2622 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2623 break; 2624 case NVME_TCP_PDU_RECV_STATE_ERROR: 2625 if (spdk_sock_is_connected(tqpair->sock) && tqpair->wait_terminate) { 2626 return NVME_TCP_PDU_IN_PROGRESS; 2627 } 2628 return NVME_TCP_PDU_FATAL; 2629 default: 2630 SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state); 2631 abort(); 2632 break; 2633 } 2634 } while (tqpair->recv_state != prev_state); 2635 2636 return rc; 2637 } 2638 2639 static inline void * 2640 nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list, 2641 struct spdk_nvmf_tcp_req *tcp_req) 2642 { 2643 struct spdk_nvmf_tcp_control_msg *msg; 2644 2645 assert(list); 2646 2647 msg = STAILQ_FIRST(&list->free_msgs); 2648 if (!msg) { 2649 SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n"); 2650 STAILQ_INSERT_TAIL(&list->waiting_for_msg_reqs, tcp_req, control_msg_link); 2651 return NULL; 2652 } 2653 STAILQ_REMOVE_HEAD(&list->free_msgs, link); 2654 return msg; 2655 } 2656 2657 static inline void 2658 nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg) 2659 { 2660 struct spdk_nvmf_tcp_control_msg *msg = _msg; 2661 struct spdk_nvmf_tcp_req *tcp_req; 2662 struct spdk_nvmf_tcp_transport *ttransport; 2663 2664 assert(list); 2665 STAILQ_INSERT_HEAD(&list->free_msgs, msg, link); 2666 if (!STAILQ_EMPTY(&list->waiting_for_msg_reqs)) { 2667 tcp_req = STAILQ_FIRST(&list->waiting_for_msg_reqs); 2668 STAILQ_REMOVE_HEAD(&list->waiting_for_msg_reqs, control_msg_link); 2669 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 2670 struct spdk_nvmf_tcp_transport, transport); 2671 nvmf_tcp_req_process(ttransport, tcp_req); 2672 } 2673 } 2674 2675 static void 2676 nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, 2677 struct spdk_nvmf_transport *transport, 2678 struct spdk_nvmf_transport_poll_group *group) 2679 { 2680 struct spdk_nvmf_request *req = &tcp_req->req; 2681 struct spdk_nvme_cmd *cmd; 2682 struct spdk_nvme_sgl_descriptor *sgl; 2683 struct spdk_nvmf_tcp_poll_group *tgroup; 2684 enum spdk_nvme_tcp_term_req_fes fes; 2685 struct nvme_tcp_pdu *pdu; 2686 struct spdk_nvmf_tcp_qpair *tqpair; 2687 uint32_t length, error_offset = 0; 2688 2689 cmd = &req->cmd->nvme_cmd; 2690 sgl = &cmd->dptr.sgl1; 2691 2692 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 2693 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 2694 /* get request length from sgl */ 2695 length = sgl->unkeyed.length; 2696 if (spdk_unlikely(length > transport->opts.max_io_size)) { 2697 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 2698 length, transport->opts.max_io_size); 2699 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2700 goto fatal_err; 2701 } 2702 2703 /* fill request length and populate iovs */ 2704 req->length = length; 2705 2706 SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length); 2707 2708 if (spdk_unlikely(req->dif_enabled)) { 2709 req->dif.orig_length = length; 2710 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2711 req->dif.elba_length = length; 2712 } 2713 2714 if (nvmf_ctrlr_use_zcopy(req)) { 2715 SPDK_DEBUGLOG(nvmf_tcp, "Using zero-copy to execute request %p\n", tcp_req); 2716 req->data_from_pool = false; 2717 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER); 2718 return; 2719 } 2720 2721 if (spdk_nvmf_request_get_buffers(req, group, transport, length)) { 2722 /* No available buffers. Queue this request up. */ 2723 SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n", 2724 tcp_req); 2725 return; 2726 } 2727 2728 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER); 2729 SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n", 2730 tcp_req, req->iovcnt, req->iov[0].iov_base); 2731 2732 return; 2733 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2734 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2735 uint64_t offset = sgl->address; 2736 uint32_t max_len = transport->opts.in_capsule_data_size; 2737 2738 assert(tcp_req->has_in_capsule_data); 2739 /* Capsule Cmd with In-capsule Data should get data length from pdu header */ 2740 tqpair = tcp_req->pdu->qpair; 2741 /* receiving pdu is not same with the pdu in tcp_req */ 2742 pdu = tqpair->pdu_in_progress; 2743 length = pdu->hdr.common.plen - pdu->psh_len - sizeof(struct spdk_nvme_tcp_common_pdu_hdr); 2744 if (tqpair->host_ddgst_enable) { 2745 length -= SPDK_NVME_TCP_DIGEST_LEN; 2746 } 2747 /* This error is not defined in NVMe/TCP spec, take this error as fatal error */ 2748 if (spdk_unlikely(length != sgl->unkeyed.length)) { 2749 SPDK_ERRLOG("In-Capsule Data length 0x%x is not equal to SGL data length 0x%x\n", 2750 length, sgl->unkeyed.length); 2751 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2752 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2753 goto fatal_err; 2754 } 2755 2756 SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2757 offset, length); 2758 2759 /* The NVMe/TCP transport does not use ICDOFF to control the in-capsule data offset. ICDOFF should be '0' */ 2760 if (spdk_unlikely(offset != 0)) { 2761 /* Not defined fatal error in NVMe/TCP spec, handle this error as a fatal error */ 2762 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " should be ZERO in NVMe/TCP\n", offset); 2763 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2764 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.address); 2765 goto fatal_err; 2766 } 2767 2768 if (spdk_unlikely(length > max_len)) { 2769 /* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */ 2770 if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE && 2771 (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) { 2772 2773 /* Get a buffer from dedicated list */ 2774 SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n"); 2775 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2776 assert(tgroup->control_msg_list); 2777 req->iov[0].iov_base = nvmf_tcp_control_msg_get(tgroup->control_msg_list, tcp_req); 2778 if (!req->iov[0].iov_base) { 2779 /* No available buffers. Queue this request up. */ 2780 SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req); 2781 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF); 2782 return; 2783 } 2784 } else { 2785 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2786 length, max_len); 2787 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2788 goto fatal_err; 2789 } 2790 } else { 2791 req->iov[0].iov_base = tcp_req->buf; 2792 } 2793 2794 req->length = length; 2795 req->data_from_pool = false; 2796 2797 if (spdk_unlikely(req->dif_enabled)) { 2798 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2799 req->dif.elba_length = length; 2800 } 2801 2802 req->iov[0].iov_len = length; 2803 req->iovcnt = 1; 2804 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_HAVE_BUFFER); 2805 2806 return; 2807 } 2808 /* If we want to handle the problem here, then we can't skip the following data segment. 2809 * Because this function runs before reading data part, now handle all errors as fatal errors. */ 2810 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2811 sgl->generic.type, sgl->generic.subtype); 2812 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2813 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.generic); 2814 fatal_err: 2815 nvmf_tcp_send_c2h_term_req(tcp_req->pdu->qpair, tcp_req->pdu, fes, error_offset); 2816 } 2817 2818 static inline enum spdk_nvme_media_error_status_code 2819 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) { 2820 enum spdk_nvme_media_error_status_code result; 2821 2822 switch (err_type) 2823 { 2824 case SPDK_DIF_REFTAG_ERROR: 2825 result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; 2826 break; 2827 case SPDK_DIF_APPTAG_ERROR: 2828 result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; 2829 break; 2830 case SPDK_DIF_GUARD_ERROR: 2831 result = SPDK_NVME_SC_GUARD_CHECK_ERROR; 2832 break; 2833 default: 2834 SPDK_UNREACHABLE(); 2835 break; 2836 } 2837 2838 return result; 2839 } 2840 2841 static void 2842 _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2843 struct spdk_nvmf_tcp_req *tcp_req) 2844 { 2845 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF( 2846 tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 2847 struct nvme_tcp_pdu *rsp_pdu; 2848 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2849 uint32_t plen, pdo, alignment; 2850 int rc; 2851 2852 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2853 2854 rsp_pdu = tcp_req->pdu; 2855 assert(rsp_pdu != NULL); 2856 2857 c2h_data = &rsp_pdu->hdr.c2h_data; 2858 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2859 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2860 2861 if (tqpair->host_hdgst_enable) { 2862 plen += SPDK_NVME_TCP_DIGEST_LEN; 2863 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2864 } 2865 2866 /* set the psh */ 2867 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2868 c2h_data->datal = tcp_req->req.length - tcp_req->pdu->rw_offset; 2869 c2h_data->datao = tcp_req->pdu->rw_offset; 2870 2871 /* set the padding */ 2872 rsp_pdu->padding_len = 0; 2873 pdo = plen; 2874 if (tqpair->cpda) { 2875 alignment = (tqpair->cpda + 1) << 2; 2876 if (plen % alignment != 0) { 2877 pdo = (plen + alignment) / alignment * alignment; 2878 rsp_pdu->padding_len = pdo - plen; 2879 plen = pdo; 2880 } 2881 } 2882 2883 c2h_data->common.pdo = pdo; 2884 plen += c2h_data->datal; 2885 if (tqpair->host_ddgst_enable) { 2886 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2887 plen += SPDK_NVME_TCP_DIGEST_LEN; 2888 } 2889 2890 c2h_data->common.plen = plen; 2891 2892 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2893 rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 2894 } 2895 2896 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2897 c2h_data->datao, c2h_data->datal); 2898 2899 2900 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2901 /* Need to send the capsule response if response is not all 0 */ 2902 if (ttransport->tcp_opts.c2h_success && 2903 tcp_req->rsp.cdw0 == 0 && tcp_req->rsp.cdw1 == 0) { 2904 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2905 } 2906 2907 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2908 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2909 struct spdk_dif_error err_blk = {}; 2910 uint32_t mapped_length = 0; 2911 uint32_t available_iovs = SPDK_COUNTOF(rsp_pdu->iov); 2912 uint32_t ddgst_len = 0; 2913 2914 if (tqpair->host_ddgst_enable) { 2915 /* Data digest consumes additional iov entry */ 2916 available_iovs--; 2917 /* plen needs to be updated since nvme_tcp_build_iovs compares expected and actual plen */ 2918 ddgst_len = SPDK_NVME_TCP_DIGEST_LEN; 2919 c2h_data->common.plen -= ddgst_len; 2920 } 2921 /* Temp call to estimate if data can be described by limited number of iovs. 2922 * iov vector will be rebuilt in nvmf_tcp_qpair_write_pdu */ 2923 nvme_tcp_build_iovs(rsp_pdu->iov, available_iovs, rsp_pdu, tqpair->host_hdgst_enable, 2924 false, &mapped_length); 2925 2926 if (mapped_length != c2h_data->common.plen) { 2927 c2h_data->datal = mapped_length - (c2h_data->common.plen - c2h_data->datal); 2928 SPDK_DEBUGLOG(nvmf_tcp, 2929 "Part C2H, data_len %u (of %u), PDU len %u, updated PDU len %u, offset %u\n", 2930 c2h_data->datal, tcp_req->req.length, c2h_data->common.plen, mapped_length, rsp_pdu->rw_offset); 2931 c2h_data->common.plen = mapped_length; 2932 2933 /* Rebuild pdu->data_iov since data length is changed */ 2934 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, c2h_data->datao, 2935 c2h_data->datal); 2936 2937 c2h_data->common.flags &= ~(SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU | 2938 SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS); 2939 } 2940 2941 c2h_data->common.plen += ddgst_len; 2942 2943 assert(rsp_pdu->rw_offset <= tcp_req->req.length); 2944 2945 rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt, 2946 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk); 2947 if (rc != 0) { 2948 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2949 err_blk.err_type, err_blk.err_offset); 2950 rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; 2951 rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type); 2952 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2953 return; 2954 } 2955 } 2956 2957 rsp_pdu->rw_offset += c2h_data->datal; 2958 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2959 } 2960 2961 static void 2962 nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2963 struct spdk_nvmf_tcp_req *tcp_req) 2964 { 2965 nvmf_tcp_req_pdu_init(tcp_req); 2966 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2967 } 2968 2969 static int 2970 request_transfer_out(struct spdk_nvmf_request *req) 2971 { 2972 struct spdk_nvmf_tcp_req *tcp_req; 2973 struct spdk_nvmf_qpair *qpair; 2974 struct spdk_nvmf_tcp_qpair *tqpair; 2975 struct spdk_nvme_cpl *rsp; 2976 2977 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2978 2979 qpair = req->qpair; 2980 rsp = &req->rsp->nvme_cpl; 2981 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2982 2983 /* Advance our sq_head pointer */ 2984 if (qpair->sq_head == qpair->sq_head_max) { 2985 qpair->sq_head = 0; 2986 } else { 2987 qpair->sq_head++; 2988 } 2989 rsp->sqhd = qpair->sq_head; 2990 2991 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2992 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2993 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2994 nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2995 } else { 2996 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2997 } 2998 2999 return 0; 3000 } 3001 3002 static void 3003 nvmf_tcp_check_fused_ordering(struct spdk_nvmf_tcp_transport *ttransport, 3004 struct spdk_nvmf_tcp_qpair *tqpair, 3005 struct spdk_nvmf_tcp_req *tcp_req) 3006 { 3007 enum spdk_nvme_cmd_fuse last, next; 3008 3009 last = tqpair->fused_first ? tqpair->fused_first->cmd.fuse : SPDK_NVME_CMD_FUSE_NONE; 3010 next = tcp_req->cmd.fuse; 3011 3012 assert(last != SPDK_NVME_CMD_FUSE_SECOND); 3013 3014 if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) { 3015 return; 3016 } 3017 3018 if (last == SPDK_NVME_CMD_FUSE_FIRST) { 3019 if (next == SPDK_NVME_CMD_FUSE_SECOND) { 3020 /* This is a valid pair of fused commands. Point them at each other 3021 * so they can be submitted consecutively once ready to be executed. 3022 */ 3023 tqpair->fused_first->fused_pair = tcp_req; 3024 tcp_req->fused_pair = tqpair->fused_first; 3025 tqpair->fused_first = NULL; 3026 return; 3027 } else { 3028 /* Mark the last req as failed since it wasn't followed by a SECOND. */ 3029 tqpair->fused_first->fused_failed = true; 3030 3031 /* 3032 * If the last req is in READY_TO_EXECUTE state, then call 3033 * nvmf_tcp_req_process(), otherwise nothing else will kick it. 3034 */ 3035 if (tqpair->fused_first->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 3036 nvmf_tcp_req_process(ttransport, tqpair->fused_first); 3037 } 3038 3039 tqpair->fused_first = NULL; 3040 } 3041 } 3042 3043 if (next == SPDK_NVME_CMD_FUSE_FIRST) { 3044 /* Set tqpair->fused_first here so that we know to check that the next request 3045 * is a SECOND (and to fail this one if it isn't). 3046 */ 3047 tqpair->fused_first = tcp_req; 3048 } else if (next == SPDK_NVME_CMD_FUSE_SECOND) { 3049 /* Mark this req failed since it is a SECOND and the last one was not a FIRST. */ 3050 tcp_req->fused_failed = true; 3051 } 3052 } 3053 3054 static bool 3055 nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 3056 struct spdk_nvmf_tcp_req *tcp_req) 3057 { 3058 struct spdk_nvmf_tcp_qpair *tqpair; 3059 uint32_t plen; 3060 struct nvme_tcp_pdu *pdu; 3061 enum spdk_nvmf_tcp_req_state prev_state; 3062 bool progress = false; 3063 struct spdk_nvmf_transport *transport = &ttransport->transport; 3064 struct spdk_nvmf_transport_poll_group *group; 3065 struct spdk_nvmf_tcp_poll_group *tgroup; 3066 3067 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 3068 group = &tqpair->group->group; 3069 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 3070 3071 /* If the qpair is not active, we need to abort the outstanding requests. */ 3072 if (!spdk_nvmf_qpair_is_active(&tqpair->qpair)) { 3073 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 3074 nvmf_tcp_request_get_buffers_abort(tcp_req); 3075 } 3076 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 3077 } 3078 3079 /* The loop here is to allow for several back-to-back state changes. */ 3080 do { 3081 prev_state = tcp_req->state; 3082 3083 SPDK_DEBUGLOG(nvmf_tcp, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 3084 tqpair); 3085 3086 switch (tcp_req->state) { 3087 case TCP_REQUEST_STATE_FREE: 3088 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 3089 * to escape this state. */ 3090 break; 3091 case TCP_REQUEST_STATE_NEW: 3092 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req, 3093 tqpair->qpair.queue_depth); 3094 3095 /* copy the cmd from the receive pdu */ 3096 tcp_req->cmd = tqpair->pdu_in_progress->hdr.capsule_cmd.ccsqe; 3097 3098 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) { 3099 tcp_req->req.dif_enabled = true; 3100 tqpair->pdu_in_progress->dif_ctx = &tcp_req->req.dif.dif_ctx; 3101 } 3102 3103 nvmf_tcp_check_fused_ordering(ttransport, tqpair, tcp_req); 3104 3105 /* The next state transition depends on the data transfer needs of this request. */ 3106 tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req); 3107 3108 if (spdk_unlikely(tcp_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) { 3109 nvmf_tcp_req_set_cpl(tcp_req, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_INVALID_OPCODE); 3110 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 3111 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3112 SPDK_DEBUGLOG(nvmf_tcp, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", tcp_req); 3113 break; 3114 } 3115 3116 /* If no data to transfer, ready to execute. */ 3117 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 3118 /* Reset the tqpair receiving pdu state */ 3119 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 3120 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 3121 break; 3122 } 3123 3124 pdu = tqpair->pdu_in_progress; 3125 plen = pdu->hdr.common.hlen; 3126 if (tqpair->host_hdgst_enable) { 3127 plen += SPDK_NVME_TCP_DIGEST_LEN; 3128 } 3129 if (pdu->hdr.common.plen != plen) { 3130 tcp_req->has_in_capsule_data = true; 3131 } else { 3132 /* Data is transmitted by C2H PDUs */ 3133 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 3134 } 3135 3136 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 3137 break; 3138 case TCP_REQUEST_STATE_NEED_BUFFER: 3139 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, tqpair->qpair.trace_id, 0, 3140 (uintptr_t)tcp_req); 3141 3142 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 3143 3144 /* Try to get a data buffer */ 3145 nvmf_tcp_req_parse_sgl(tcp_req, transport, group); 3146 break; 3147 case TCP_REQUEST_STATE_HAVE_BUFFER: 3148 spdk_trace_record(TRACE_TCP_REQUEST_STATE_HAVE_BUFFER, tqpair->qpair.trace_id, 0, 3149 (uintptr_t)tcp_req); 3150 /* Get a zcopy buffer if the request can be serviced through zcopy */ 3151 if (spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 3152 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 3153 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 3154 tcp_req->req.length = tcp_req->req.dif.elba_length; 3155 } 3156 3157 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_START); 3158 spdk_nvmf_request_zcopy_start(&tcp_req->req); 3159 break; 3160 } 3161 3162 assert(tcp_req->req.iovcnt > 0); 3163 3164 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 3165 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 3166 if (tcp_req->req.data_from_pool) { 3167 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 3168 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 3169 } else { 3170 struct nvme_tcp_pdu *pdu; 3171 3172 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 3173 3174 pdu = tqpair->pdu_in_progress; 3175 SPDK_DEBUGLOG(nvmf_tcp, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 3176 tqpair); 3177 /* No need to send r2t, contained in the capsuled data */ 3178 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 3179 0, tcp_req->req.length); 3180 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 3181 } 3182 break; 3183 } 3184 3185 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 3186 break; 3187 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3188 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, tqpair->qpair.trace_id, 0, 3189 (uintptr_t)tcp_req); 3190 /* Some external code must kick a request into TCP_REQUEST_STATE_ZCOPY_START_COMPLETED 3191 * to escape this state. */ 3192 break; 3193 case TCP_REQUEST_STATE_ZCOPY_START_COMPLETED: 3194 spdk_trace_record(TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, tqpair->qpair.trace_id, 0, 3195 (uintptr_t)tcp_req); 3196 if (spdk_unlikely(spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl))) { 3197 SPDK_DEBUGLOG(nvmf_tcp, "Zero-copy start failed for tcp_req(%p) on tqpair=%p\n", 3198 tcp_req, tqpair); 3199 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3200 break; 3201 } 3202 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 3203 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 3204 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 3205 } else { 3206 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 3207 } 3208 break; 3209 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 3210 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, tqpair->qpair.trace_id, 0, 3211 (uintptr_t)tcp_req); 3212 /* The R2T completion or the h2c data incoming will kick it out of this state. */ 3213 break; 3214 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 3215 3216 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, tqpair->qpair.trace_id, 3217 0, (uintptr_t)tcp_req); 3218 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 3219 * to escape this state. */ 3220 break; 3221 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 3222 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, tqpair->qpair.trace_id, 0, 3223 (uintptr_t)tcp_req); 3224 3225 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 3226 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 3227 tcp_req->req.length = tcp_req->req.dif.elba_length; 3228 } 3229 3230 if (tcp_req->cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) { 3231 if (tcp_req->fused_failed) { 3232 /* This request failed FUSED semantics. Fail it immediately, without 3233 * even sending it to the target layer. 3234 */ 3235 nvmf_tcp_req_set_cpl(tcp_req, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_MISSING_FUSED); 3236 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3237 break; 3238 } 3239 3240 if (tcp_req->fused_pair == NULL || 3241 tcp_req->fused_pair->state != TCP_REQUEST_STATE_READY_TO_EXECUTE) { 3242 /* This request is ready to execute, but either we don't know yet if it's 3243 * valid - i.e. this is a FIRST but we haven't received the next request yet), 3244 * or the other request of this fused pair isn't ready to execute. So 3245 * break here and this request will get processed later either when the 3246 * other request is ready or we find that this request isn't valid. 3247 */ 3248 break; 3249 } 3250 } 3251 3252 if (!spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 3253 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 3254 /* If we get to this point, and this request is a fused command, we know that 3255 * it is part of a valid sequence (FIRST followed by a SECOND) and that both 3256 * requests are READY_TO_EXECUTE. So call spdk_nvmf_request_exec() both on this 3257 * request, and the other request of the fused pair, in the correct order. 3258 * Also clear the ->fused_pair pointers on both requests, since after this point 3259 * we no longer need to maintain the relationship between these two requests. 3260 */ 3261 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) { 3262 assert(tcp_req->fused_pair != NULL); 3263 assert(tcp_req->fused_pair->fused_pair == tcp_req); 3264 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 3265 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 3266 tcp_req->fused_pair->fused_pair = NULL; 3267 tcp_req->fused_pair = NULL; 3268 } 3269 spdk_nvmf_request_exec(&tcp_req->req); 3270 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) { 3271 assert(tcp_req->fused_pair != NULL); 3272 assert(tcp_req->fused_pair->fused_pair == tcp_req); 3273 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 3274 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 3275 tcp_req->fused_pair->fused_pair = NULL; 3276 tcp_req->fused_pair = NULL; 3277 } 3278 } else { 3279 /* For zero-copy, only requests with data coming from host to the 3280 * controller can end up here. */ 3281 assert(tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER); 3282 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT); 3283 spdk_nvmf_request_zcopy_end(&tcp_req->req, true); 3284 } 3285 3286 break; 3287 case TCP_REQUEST_STATE_EXECUTING: 3288 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req); 3289 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 3290 * to escape this state. */ 3291 break; 3292 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3293 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, tqpair->qpair.trace_id, 0, 3294 (uintptr_t)tcp_req); 3295 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 3296 * to escape this state. */ 3297 break; 3298 case TCP_REQUEST_STATE_EXECUTED: 3299 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req); 3300 3301 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 3302 tcp_req->req.length = tcp_req->req.dif.orig_length; 3303 } 3304 3305 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3306 break; 3307 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 3308 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, tqpair->qpair.trace_id, 0, 3309 (uintptr_t)tcp_req); 3310 if (request_transfer_out(&tcp_req->req) != 0) { 3311 assert(0); /* No good way to handle this currently */ 3312 } 3313 break; 3314 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 3315 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, tqpair->qpair.trace_id, 3316 0, (uintptr_t)tcp_req); 3317 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 3318 * to escape this state. */ 3319 break; 3320 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 3321 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, tqpair->qpair.trace_id, 0, 3322 (uintptr_t)tcp_req); 3323 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 3324 * to escape this state. */ 3325 break; 3326 case TCP_REQUEST_STATE_COMPLETED: 3327 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, tqpair->qpair.trace_id, 0, (uintptr_t)tcp_req, 3328 tqpair->qpair.queue_depth); 3329 /* If there's an outstanding PDU sent to the host, the request is completed 3330 * due to the qpair being disconnected. We must delay the completion until 3331 * that write is done to avoid freeing the request twice. */ 3332 if (spdk_unlikely(tcp_req->pdu_in_use)) { 3333 SPDK_DEBUGLOG(nvmf_tcp, "Delaying completion due to outstanding " 3334 "write on req=%p\n", tcp_req); 3335 /* This can only happen for zcopy requests */ 3336 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 3337 assert(!spdk_nvmf_qpair_is_active(&tqpair->qpair)); 3338 break; 3339 } 3340 3341 if (tcp_req->req.data_from_pool) { 3342 spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); 3343 } else if (spdk_unlikely(tcp_req->has_in_capsule_data && 3344 (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC || 3345 tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) { 3346 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3347 assert(tgroup->control_msg_list); 3348 SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n"); 3349 nvmf_tcp_control_msg_put(tgroup->control_msg_list, 3350 tcp_req->req.iov[0].iov_base); 3351 } else if (tcp_req->req.zcopy_bdev_io != NULL) { 3352 /* If the request has an unreleased zcopy bdev_io, it's either a 3353 * read, a failed write, or the qpair is being disconnected */ 3354 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 3355 assert(tcp_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 3356 spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl) || 3357 !spdk_nvmf_qpair_is_active(&tqpair->qpair)); 3358 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE); 3359 spdk_nvmf_request_zcopy_end(&tcp_req->req, false); 3360 break; 3361 } 3362 tcp_req->req.length = 0; 3363 tcp_req->req.iovcnt = 0; 3364 tcp_req->fused_failed = false; 3365 if (tcp_req->fused_pair) { 3366 /* This req was part of a valid fused pair, but failed before it got to 3367 * READ_TO_EXECUTE state. This means we need to fail the other request 3368 * in the pair, because it is no longer part of a valid pair. If the pair 3369 * already reached READY_TO_EXECUTE state, we need to kick it. 3370 */ 3371 tcp_req->fused_pair->fused_failed = true; 3372 if (tcp_req->fused_pair->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 3373 nvmf_tcp_req_process(ttransport, tcp_req->fused_pair); 3374 } 3375 tcp_req->fused_pair = NULL; 3376 } 3377 3378 nvmf_tcp_req_put(tqpair, tcp_req); 3379 break; 3380 case TCP_REQUEST_NUM_STATES: 3381 default: 3382 assert(0); 3383 break; 3384 } 3385 3386 if (tcp_req->state != prev_state) { 3387 progress = true; 3388 } 3389 } while (tcp_req->state != prev_state); 3390 3391 return progress; 3392 } 3393 3394 static void 3395 tcp_sock_cb(void *arg) 3396 { 3397 struct spdk_nvmf_tcp_qpair *tqpair = arg; 3398 int rc; 3399 3400 assert(tqpair != NULL); 3401 rc = nvmf_tcp_sock_process(tqpair); 3402 3403 /* If there was a new socket error, disconnect */ 3404 if (rc < 0) { 3405 nvmf_tcp_qpair_disconnect(tqpair); 3406 } 3407 } 3408 3409 static void 3410 nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 3411 { 3412 tcp_sock_cb(arg); 3413 } 3414 3415 static int 3416 nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 3417 struct spdk_nvmf_qpair *qpair) 3418 { 3419 struct spdk_nvmf_tcp_poll_group *tgroup; 3420 struct spdk_nvmf_tcp_qpair *tqpair; 3421 int rc; 3422 3423 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3424 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3425 3426 rc = nvmf_tcp_qpair_sock_init(tqpair); 3427 if (rc != 0) { 3428 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 3429 return -1; 3430 } 3431 3432 rc = nvmf_tcp_qpair_init(&tqpair->qpair); 3433 if (rc < 0) { 3434 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 3435 return -1; 3436 } 3437 3438 rc = nvmf_tcp_qpair_init_mem_resource(tqpair); 3439 if (rc < 0) { 3440 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 3441 return -1; 3442 } 3443 3444 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 3445 nvmf_tcp_sock_cb, tqpair); 3446 if (rc != 0) { 3447 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 3448 spdk_strerror(errno), errno); 3449 return -1; 3450 } 3451 3452 tqpair->group = tgroup; 3453 nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_INVALID); 3454 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 3455 3456 return 0; 3457 } 3458 3459 static int 3460 nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 3461 struct spdk_nvmf_qpair *qpair) 3462 { 3463 struct spdk_nvmf_tcp_poll_group *tgroup; 3464 struct spdk_nvmf_tcp_qpair *tqpair; 3465 int rc; 3466 3467 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3468 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3469 3470 assert(tqpair->group == tgroup); 3471 3472 SPDK_DEBUGLOG(nvmf_tcp, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 3473 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 3474 /* Change the state to move the qpair from the await_req list to the main list 3475 * and prevent adding it again later by nvmf_tcp_qpair_set_recv_state() */ 3476 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 3477 } 3478 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 3479 3480 /* Try to force out any pending writes */ 3481 spdk_sock_flush(tqpair->sock); 3482 3483 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 3484 if (rc != 0) { 3485 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 3486 spdk_strerror(errno), errno); 3487 } 3488 3489 return rc; 3490 } 3491 3492 static int 3493 nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 3494 { 3495 struct spdk_nvmf_tcp_transport *ttransport; 3496 struct spdk_nvmf_tcp_req *tcp_req; 3497 struct spdk_nvmf_tcp_qpair *tqpair; 3498 3499 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3500 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 3501 tqpair = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_tcp_qpair, qpair); 3502 3503 switch (tcp_req->state) { 3504 case TCP_REQUEST_STATE_EXECUTING: 3505 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3506 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 3507 break; 3508 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3509 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_ZCOPY_START_COMPLETED); 3510 break; 3511 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 3512 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 3513 /* In the interrupt mode it's possible that all responses are already written out over 3514 * the socket but zero-copy buffers are still not released. In that case there won't be 3515 * any event to trigger further socket processing. Send msg to a thread to avoid deadlock. 3516 */ 3517 if (spdk_unlikely(spdk_interrupt_mode_is_enabled() && 3518 tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ && 3519 spdk_nvmf_qpair_is_active(&tqpair->qpair))) { 3520 spdk_thread_send_msg(spdk_get_thread(), tcp_sock_cb, tqpair); 3521 } 3522 break; 3523 default: 3524 SPDK_ERRLOG("Unexpected request state %d (cntlid:%d, qid:%d)\n", 3525 tcp_req->state, req->qpair->ctrlr->cntlid, req->qpair->qid); 3526 assert(0 && "Unexpected request state"); 3527 break; 3528 } 3529 3530 nvmf_tcp_req_process(ttransport, tcp_req); 3531 3532 return 0; 3533 } 3534 3535 static void 3536 nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair, 3537 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 3538 { 3539 struct spdk_nvmf_tcp_qpair *tqpair; 3540 3541 SPDK_DEBUGLOG(nvmf_tcp, "Qpair: %p\n", qpair); 3542 3543 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3544 3545 assert(tqpair->fini_cb_fn == NULL); 3546 tqpair->fini_cb_fn = cb_fn; 3547 tqpair->fini_cb_arg = cb_arg; 3548 3549 nvmf_tcp_qpair_set_state(tqpair, NVMF_TCP_QPAIR_STATE_EXITED); 3550 nvmf_tcp_qpair_destroy(tqpair); 3551 } 3552 3553 static int 3554 nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 3555 { 3556 struct spdk_nvmf_tcp_poll_group *tgroup; 3557 int num_events, rc = 0, rc2; 3558 struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp; 3559 3560 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3561 3562 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) { 3563 return 0; 3564 } 3565 3566 num_events = spdk_sock_group_poll(tgroup->sock_group); 3567 if (spdk_unlikely(num_events < 0)) { 3568 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 3569 } 3570 3571 TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) { 3572 rc2 = nvmf_tcp_sock_process(tqpair); 3573 3574 /* If there was a new socket error, disconnect */ 3575 if (spdk_unlikely(rc2 < 0)) { 3576 nvmf_tcp_qpair_disconnect(tqpair); 3577 if (rc == 0) { 3578 rc = rc2; 3579 } 3580 } 3581 } 3582 3583 return rc == 0 ? num_events : rc; 3584 } 3585 3586 static int 3587 nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 3588 struct spdk_nvme_transport_id *trid, bool peer) 3589 { 3590 struct spdk_nvmf_tcp_qpair *tqpair; 3591 uint16_t port; 3592 3593 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3594 spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP); 3595 3596 if (peer) { 3597 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 3598 port = tqpair->initiator_port; 3599 } else { 3600 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 3601 port = tqpair->target_port; 3602 } 3603 3604 if (spdk_sock_is_ipv4(tqpair->sock)) { 3605 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 3606 } else if (spdk_sock_is_ipv6(tqpair->sock)) { 3607 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 3608 } else { 3609 return -1; 3610 } 3611 3612 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 3613 return 0; 3614 } 3615 3616 static int 3617 nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 3618 struct spdk_nvme_transport_id *trid) 3619 { 3620 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3621 } 3622 3623 static int 3624 nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 3625 struct spdk_nvme_transport_id *trid) 3626 { 3627 return nvmf_tcp_qpair_get_trid(qpair, trid, 1); 3628 } 3629 3630 static int 3631 nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 3632 struct spdk_nvme_transport_id *trid) 3633 { 3634 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3635 } 3636 3637 static void 3638 nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req, 3639 struct spdk_nvmf_tcp_req *tcp_req_to_abort) 3640 { 3641 nvmf_tcp_req_set_cpl(tcp_req_to_abort, SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_BY_REQUEST); 3642 nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3643 3644 req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ 3645 } 3646 3647 static int 3648 _nvmf_tcp_qpair_abort_request(void *ctx) 3649 { 3650 struct spdk_nvmf_request *req = ctx; 3651 struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort, 3652 struct spdk_nvmf_tcp_req, req); 3653 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, 3654 struct spdk_nvmf_tcp_qpair, qpair); 3655 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 3656 struct spdk_nvmf_tcp_transport, transport); 3657 int rc; 3658 3659 spdk_poller_unregister(&req->poller); 3660 3661 switch (tcp_req_to_abort->state) { 3662 case TCP_REQUEST_STATE_EXECUTING: 3663 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3664 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3665 rc = nvmf_ctrlr_abort_request(req); 3666 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { 3667 return SPDK_POLLER_BUSY; 3668 } 3669 break; 3670 3671 case TCP_REQUEST_STATE_NEED_BUFFER: 3672 nvmf_tcp_request_get_buffers_abort(tcp_req_to_abort); 3673 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 3674 nvmf_tcp_req_process(ttransport, tcp_req_to_abort); 3675 break; 3676 3677 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 3678 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 3679 if (spdk_get_ticks() < req->timeout_tsc) { 3680 req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0); 3681 return SPDK_POLLER_BUSY; 3682 } 3683 break; 3684 3685 default: 3686 /* Requests in other states are either un-abortable (e.g. 3687 * TRANSFERRING_CONTROLLER_TO_HOST) or should never end up here, as they're 3688 * immediately transitioned to other states in nvmf_tcp_req_process() (e.g. 3689 * READY_TO_EXECUTE). But it is fine to end up here, as we'll simply complete the 3690 * abort request with the bit0 of dword0 set (command not aborted). 3691 */ 3692 break; 3693 } 3694 3695 spdk_nvmf_request_complete(req); 3696 return SPDK_POLLER_BUSY; 3697 } 3698 3699 static void 3700 nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 3701 struct spdk_nvmf_request *req) 3702 { 3703 struct spdk_nvmf_tcp_qpair *tqpair; 3704 struct spdk_nvmf_tcp_transport *ttransport; 3705 struct spdk_nvmf_transport *transport; 3706 uint16_t cid; 3707 uint32_t i; 3708 struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL; 3709 3710 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3711 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3712 transport = &ttransport->transport; 3713 3714 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 3715 3716 for (i = 0; i < tqpair->resource_count; i++) { 3717 if (tqpair->reqs[i].state != TCP_REQUEST_STATE_FREE && 3718 tqpair->reqs[i].req.cmd->nvme_cmd.cid == cid) { 3719 tcp_req_to_abort = &tqpair->reqs[i]; 3720 break; 3721 } 3722 } 3723 3724 spdk_trace_record(TRACE_TCP_QP_ABORT_REQ, tqpair->qpair.trace_id, 0, (uintptr_t)req); 3725 3726 if (tcp_req_to_abort == NULL) { 3727 spdk_nvmf_request_complete(req); 3728 return; 3729 } 3730 3731 req->req_to_abort = &tcp_req_to_abort->req; 3732 req->timeout_tsc = spdk_get_ticks() + 3733 transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); 3734 req->poller = NULL; 3735 3736 _nvmf_tcp_qpair_abort_request(req); 3737 } 3738 3739 struct tcp_subsystem_add_host_opts { 3740 char *psk; 3741 }; 3742 3743 static const struct spdk_json_object_decoder tcp_subsystem_add_host_opts_decoder[] = { 3744 {"psk", offsetof(struct tcp_subsystem_add_host_opts, psk), spdk_json_decode_string, true}, 3745 }; 3746 3747 static int 3748 nvmf_tcp_subsystem_add_host(struct spdk_nvmf_transport *transport, 3749 const struct spdk_nvmf_subsystem *subsystem, 3750 const char *hostnqn, 3751 const struct spdk_json_val *transport_specific) 3752 { 3753 struct tcp_subsystem_add_host_opts opts; 3754 struct spdk_nvmf_tcp_transport *ttransport; 3755 struct tcp_psk_entry *tmp, *entry = NULL; 3756 uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {}; 3757 char psk_interchange[SPDK_TLS_PSK_MAX_LEN + 1] = {}; 3758 uint8_t tls_cipher_suite; 3759 int rc = 0; 3760 uint8_t psk_retained_hash; 3761 uint64_t psk_configured_size; 3762 3763 if (transport_specific == NULL) { 3764 return 0; 3765 } 3766 3767 assert(transport != NULL); 3768 assert(subsystem != NULL); 3769 3770 memset(&opts, 0, sizeof(opts)); 3771 3772 /* Decode PSK (either name of a key or file path) */ 3773 if (spdk_json_decode_object_relaxed(transport_specific, tcp_subsystem_add_host_opts_decoder, 3774 SPDK_COUNTOF(tcp_subsystem_add_host_opts_decoder), &opts)) { 3775 SPDK_ERRLOG("spdk_json_decode_object failed\n"); 3776 return -EINVAL; 3777 } 3778 3779 if (opts.psk == NULL) { 3780 return 0; 3781 } 3782 3783 entry = calloc(1, sizeof(struct tcp_psk_entry)); 3784 if (entry == NULL) { 3785 SPDK_ERRLOG("Unable to allocate memory for PSK entry!\n"); 3786 rc = -ENOMEM; 3787 goto end; 3788 } 3789 3790 entry->key = spdk_keyring_get_key(opts.psk); 3791 if (entry->key == NULL) { 3792 SPDK_ERRLOG("Key '%s' does not exist\n", opts.psk); 3793 rc = -EINVAL; 3794 goto end; 3795 } 3796 3797 rc = spdk_key_get_key(entry->key, psk_interchange, SPDK_TLS_PSK_MAX_LEN); 3798 if (rc < 0) { 3799 SPDK_ERRLOG("Failed to retrieve PSK '%s'\n", opts.psk); 3800 rc = -EINVAL; 3801 goto end; 3802 } 3803 3804 /* Parse PSK interchange to get length of base64 encoded data. 3805 * This is then used to decide which cipher suite should be used 3806 * to generate PSK identity and TLS PSK later on. */ 3807 rc = nvme_tcp_parse_interchange_psk(psk_interchange, psk_configured, sizeof(psk_configured), 3808 &psk_configured_size, &psk_retained_hash); 3809 if (rc < 0) { 3810 SPDK_ERRLOG("Failed to parse PSK interchange!\n"); 3811 goto end; 3812 } 3813 3814 /* The Base64 string encodes the configured PSK (32 or 48 bytes binary). 3815 * This check also ensures that psk_configured_size is smaller than 3816 * psk_retained buffer size. */ 3817 if (psk_configured_size == SHA256_DIGEST_LENGTH) { 3818 tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256; 3819 } else if (psk_configured_size == SHA384_DIGEST_LENGTH) { 3820 tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384; 3821 } else { 3822 SPDK_ERRLOG("Unrecognized cipher suite!\n"); 3823 rc = -EINVAL; 3824 goto end; 3825 } 3826 3827 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 3828 /* Generate PSK identity. */ 3829 rc = nvme_tcp_generate_psk_identity(entry->pskid, sizeof(entry->pskid), hostnqn, 3830 subsystem->subnqn, tls_cipher_suite); 3831 if (rc) { 3832 rc = -EINVAL; 3833 goto end; 3834 } 3835 /* Check if PSK identity entry already exists. */ 3836 TAILQ_FOREACH(tmp, &ttransport->psks, link) { 3837 if (strncmp(tmp->pskid, entry->pskid, NVMF_PSK_IDENTITY_LEN) == 0) { 3838 SPDK_ERRLOG("Given PSK identity: %s entry already exists!\n", entry->pskid); 3839 rc = -EEXIST; 3840 goto end; 3841 } 3842 } 3843 3844 if (snprintf(entry->hostnqn, sizeof(entry->hostnqn), "%s", hostnqn) < 0) { 3845 SPDK_ERRLOG("Could not write hostnqn string!\n"); 3846 rc = -EINVAL; 3847 goto end; 3848 } 3849 if (snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn) < 0) { 3850 SPDK_ERRLOG("Could not write subnqn string!\n"); 3851 rc = -EINVAL; 3852 goto end; 3853 } 3854 3855 entry->tls_cipher_suite = tls_cipher_suite; 3856 3857 /* No hash indicates that Configured PSK must be used as Retained PSK. */ 3858 if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) { 3859 /* Psk configured is either 32 or 48 bytes long. */ 3860 memcpy(entry->psk, psk_configured, psk_configured_size); 3861 entry->psk_size = psk_configured_size; 3862 } else { 3863 /* Derive retained PSK. */ 3864 rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, hostnqn, entry->psk, 3865 SPDK_TLS_PSK_MAX_LEN, psk_retained_hash); 3866 if (rc < 0) { 3867 SPDK_ERRLOG("Unable to derive retained PSK!\n"); 3868 goto end; 3869 } 3870 entry->psk_size = rc; 3871 } 3872 3873 TAILQ_INSERT_TAIL(&ttransport->psks, entry, link); 3874 rc = 0; 3875 3876 end: 3877 spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured)); 3878 spdk_memset_s(psk_interchange, sizeof(psk_interchange), 0, sizeof(psk_interchange)); 3879 3880 free(opts.psk); 3881 if (rc != 0) { 3882 nvmf_tcp_free_psk_entry(entry); 3883 } 3884 3885 return rc; 3886 } 3887 3888 static void 3889 nvmf_tcp_subsystem_remove_host(struct spdk_nvmf_transport *transport, 3890 const struct spdk_nvmf_subsystem *subsystem, 3891 const char *hostnqn) 3892 { 3893 struct spdk_nvmf_tcp_transport *ttransport; 3894 struct tcp_psk_entry *entry, *tmp; 3895 3896 assert(transport != NULL); 3897 assert(subsystem != NULL); 3898 3899 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 3900 TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) { 3901 if ((strncmp(entry->hostnqn, hostnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0 && 3902 (strncmp(entry->subnqn, subsystem->subnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0) { 3903 TAILQ_REMOVE(&ttransport->psks, entry, link); 3904 nvmf_tcp_free_psk_entry(entry); 3905 break; 3906 } 3907 } 3908 } 3909 3910 static void 3911 nvmf_tcp_subsystem_dump_host(struct spdk_nvmf_transport *transport, 3912 const struct spdk_nvmf_subsystem *subsystem, const char *hostnqn, 3913 struct spdk_json_write_ctx *w) 3914 { 3915 struct spdk_nvmf_tcp_transport *ttransport; 3916 struct tcp_psk_entry *entry; 3917 3918 assert(transport != NULL); 3919 assert(subsystem != NULL); 3920 3921 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 3922 TAILQ_FOREACH(entry, &ttransport->psks, link) { 3923 if ((strncmp(entry->hostnqn, hostnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0 && 3924 (strncmp(entry->subnqn, subsystem->subnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0) { 3925 spdk_json_write_named_string(w, "psk", spdk_key_get_name(entry->key)); 3926 break; 3927 } 3928 } 3929 } 3930 3931 static void 3932 nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 3933 { 3934 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 3935 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 3936 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 3937 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 3938 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 3939 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 3940 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 3941 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 3942 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 3943 opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC; 3944 opts->transport_specific = NULL; 3945 } 3946 3947 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 3948 .name = "TCP", 3949 .type = SPDK_NVME_TRANSPORT_TCP, 3950 .opts_init = nvmf_tcp_opts_init, 3951 .create = nvmf_tcp_create, 3952 .dump_opts = nvmf_tcp_dump_opts, 3953 .destroy = nvmf_tcp_destroy, 3954 3955 .listen = nvmf_tcp_listen, 3956 .stop_listen = nvmf_tcp_stop_listen, 3957 3958 .listener_discover = nvmf_tcp_discover, 3959 3960 .poll_group_create = nvmf_tcp_poll_group_create, 3961 .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group, 3962 .poll_group_destroy = nvmf_tcp_poll_group_destroy, 3963 .poll_group_add = nvmf_tcp_poll_group_add, 3964 .poll_group_remove = nvmf_tcp_poll_group_remove, 3965 .poll_group_poll = nvmf_tcp_poll_group_poll, 3966 3967 .req_free = nvmf_tcp_req_free, 3968 .req_complete = nvmf_tcp_req_complete, 3969 .req_get_buffers_done = nvmf_tcp_req_get_buffers_done, 3970 3971 .qpair_fini = nvmf_tcp_close_qpair, 3972 .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid, 3973 .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid, 3974 .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid, 3975 .qpair_abort_request = nvmf_tcp_qpair_abort_request, 3976 .subsystem_add_host = nvmf_tcp_subsystem_add_host, 3977 .subsystem_remove_host = nvmf_tcp_subsystem_remove_host, 3978 .subsystem_dump_host = nvmf_tcp_subsystem_dump_host, 3979 }; 3980 3981 SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp); 3982 SPDK_LOG_REGISTER_COMPONENT(nvmf_tcp) 3983