1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/accel.h" 8 #include "spdk/stdinc.h" 9 #include "spdk/crc32.h" 10 #include "spdk/endian.h" 11 #include "spdk/assert.h" 12 #include "spdk/thread.h" 13 #include "spdk/nvmf_transport.h" 14 #include "spdk/string.h" 15 #include "spdk/trace.h" 16 #include "spdk/util.h" 17 #include "spdk/log.h" 18 19 #include "spdk_internal/assert.h" 20 #include "spdk_internal/nvme_tcp.h" 21 #include "spdk_internal/sock.h" 22 23 #include "nvmf_internal.h" 24 25 #include "spdk_internal/trace_defs.h" 26 27 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 28 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 16 29 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 30 #define SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM 32 31 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 32 33 #define SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH 2 34 #define SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH 65535 35 #define SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH 2 36 #define SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH 4096 37 38 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH 128 39 #define SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH 128 40 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 41 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 42 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 43 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 44 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 45 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE UINT32_MAX 46 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 47 #define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1 48 49 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp; 50 51 /* spdk nvmf related structure */ 52 enum spdk_nvmf_tcp_req_state { 53 54 /* The request is not currently in use */ 55 TCP_REQUEST_STATE_FREE = 0, 56 57 /* Initial state when request first received */ 58 TCP_REQUEST_STATE_NEW = 1, 59 60 /* The request is queued until a data buffer is available. */ 61 TCP_REQUEST_STATE_NEED_BUFFER = 2, 62 63 /* The request is waiting for zcopy_start to finish */ 64 TCP_REQUEST_STATE_AWAITING_ZCOPY_START = 3, 65 66 /* The request has received a zero-copy buffer */ 67 TCP_REQUEST_STATE_ZCOPY_START_COMPLETED = 4, 68 69 /* The request is currently transferring data from the host to the controller. */ 70 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER = 5, 71 72 /* The request is waiting for the R2T send acknowledgement. */ 73 TCP_REQUEST_STATE_AWAITING_R2T_ACK = 6, 74 75 /* The request is ready to execute at the block device */ 76 TCP_REQUEST_STATE_READY_TO_EXECUTE = 7, 77 78 /* The request is currently executing at the block device */ 79 TCP_REQUEST_STATE_EXECUTING = 8, 80 81 /* The request is waiting for zcopy buffers to be committed */ 82 TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT = 9, 83 84 /* The request finished executing at the block device */ 85 TCP_REQUEST_STATE_EXECUTED = 10, 86 87 /* The request is ready to send a completion */ 88 TCP_REQUEST_STATE_READY_TO_COMPLETE = 11, 89 90 /* The request is currently transferring final pdus from the controller to the host. */ 91 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST = 12, 92 93 /* The request is waiting for zcopy buffers to be released (without committing) */ 94 TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE = 13, 95 96 /* The request completed and can be marked free. */ 97 TCP_REQUEST_STATE_COMPLETED = 14, 98 99 /* Terminator */ 100 TCP_REQUEST_NUM_STATES, 101 }; 102 103 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 104 "Invalid PDU Header Field", 105 "PDU Sequence Error", 106 "Header Digiest Error", 107 "Data Transfer Out of Range", 108 "R2T Limit Exceeded", 109 "Unsupported parameter", 110 }; 111 112 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 113 { 114 spdk_trace_register_owner(OWNER_NVMF_TCP, 't'); 115 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 116 spdk_trace_register_description("TCP_REQ_NEW", 117 TRACE_TCP_REQUEST_STATE_NEW, 118 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 1, 119 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 120 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 121 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 122 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 123 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 124 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_START", 125 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, 126 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 127 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 128 spdk_trace_register_description("TCP_REQ_ZCPY_START_CPL", 129 TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, 130 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 131 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 132 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 133 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 134 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 135 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 136 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 137 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 138 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 139 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 140 spdk_trace_register_description("TCP_REQ_EXECUTING", 141 TRACE_TCP_REQUEST_STATE_EXECUTING, 142 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 143 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 144 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_CMT", 145 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, 146 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 147 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 148 spdk_trace_register_description("TCP_REQ_EXECUTED", 149 TRACE_TCP_REQUEST_STATE_EXECUTED, 150 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 151 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 152 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 153 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 154 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 155 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 156 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 157 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 158 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 159 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 160 spdk_trace_register_description("TCP_REQ_AWAIT_ZCPY_RLS", 161 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, 162 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 163 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 164 spdk_trace_register_description("TCP_REQ_COMPLETED", 165 TRACE_TCP_REQUEST_STATE_COMPLETED, 166 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 167 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 168 spdk_trace_register_description("TCP_WRITE_START", 169 TRACE_TCP_FLUSH_WRITEBUF_START, 170 OWNER_NVMF_TCP, OBJECT_NONE, 0, 171 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 172 spdk_trace_register_description("TCP_WRITE_DONE", 173 TRACE_TCP_FLUSH_WRITEBUF_DONE, 174 OWNER_NVMF_TCP, OBJECT_NONE, 0, 175 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 176 spdk_trace_register_description("TCP_READ_DONE", 177 TRACE_TCP_READ_FROM_SOCKET_DONE, 178 OWNER_NVMF_TCP, OBJECT_NONE, 0, 179 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 180 spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", 181 TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 182 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 183 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 184 185 spdk_trace_register_description("TCP_QP_CREATE", TRACE_TCP_QP_CREATE, 186 OWNER_NVMF_TCP, OBJECT_NONE, 0, 187 SPDK_TRACE_ARG_TYPE_INT, ""); 188 spdk_trace_register_description("TCP_QP_SOCK_INIT", TRACE_TCP_QP_SOCK_INIT, 189 OWNER_NVMF_TCP, OBJECT_NONE, 0, 190 SPDK_TRACE_ARG_TYPE_INT, ""); 191 spdk_trace_register_description("TCP_QP_STATE_CHANGE", TRACE_TCP_QP_STATE_CHANGE, 192 OWNER_NVMF_TCP, OBJECT_NONE, 0, 193 SPDK_TRACE_ARG_TYPE_INT, "state"); 194 spdk_trace_register_description("TCP_QP_DISCONNECT", TRACE_TCP_QP_DISCONNECT, 195 OWNER_NVMF_TCP, OBJECT_NONE, 0, 196 SPDK_TRACE_ARG_TYPE_INT, ""); 197 spdk_trace_register_description("TCP_QP_DESTROY", TRACE_TCP_QP_DESTROY, 198 OWNER_NVMF_TCP, OBJECT_NONE, 0, 199 SPDK_TRACE_ARG_TYPE_INT, ""); 200 spdk_trace_register_description("TCP_QP_ABORT_REQ", TRACE_TCP_QP_ABORT_REQ, 201 OWNER_NVMF_TCP, OBJECT_NONE, 0, 202 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 203 spdk_trace_register_description("TCP_QP_RCV_STATE_CHANGE", TRACE_TCP_QP_RCV_STATE_CHANGE, 204 OWNER_NVMF_TCP, OBJECT_NONE, 0, 205 SPDK_TRACE_ARG_TYPE_INT, "state"); 206 207 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_NVMF_TCP_IO, 1); 208 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_NVMF_TCP_IO, 0); 209 } 210 211 struct spdk_nvmf_tcp_req { 212 struct spdk_nvmf_request req; 213 struct spdk_nvme_cpl rsp; 214 struct spdk_nvme_cmd cmd; 215 216 /* A PDU that can be used for sending responses. This is 217 * not the incoming PDU! */ 218 struct nvme_tcp_pdu *pdu; 219 220 /* In-capsule data buffer */ 221 uint8_t *buf; 222 223 struct spdk_nvmf_tcp_req *fused_pair; 224 225 /* 226 * The PDU for a request may be used multiple times in serial over 227 * the request's lifetime. For example, first to send an R2T, then 228 * to send a completion. To catch mistakes where the PDU is used 229 * twice at the same time, add a debug flag here for init/fini. 230 */ 231 bool pdu_in_use; 232 bool has_in_capsule_data; 233 bool fused_failed; 234 235 /* transfer_tag */ 236 uint16_t ttag; 237 238 enum spdk_nvmf_tcp_req_state state; 239 240 /* 241 * h2c_offset is used when we receive the h2c_data PDU. 242 */ 243 uint32_t h2c_offset; 244 245 STAILQ_ENTRY(spdk_nvmf_tcp_req) link; 246 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 247 }; 248 249 struct spdk_nvmf_tcp_qpair { 250 struct spdk_nvmf_qpair qpair; 251 struct spdk_nvmf_tcp_poll_group *group; 252 struct spdk_sock *sock; 253 254 enum nvme_tcp_pdu_recv_state recv_state; 255 enum nvme_tcp_qpair_state state; 256 257 /* PDU being actively received */ 258 struct nvme_tcp_pdu *pdu_in_progress; 259 260 struct spdk_nvmf_tcp_req *fused_first; 261 262 /* Queues to track the requests in all states */ 263 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_working_queue; 264 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_free_queue; 265 SLIST_HEAD(, nvme_tcp_pdu) tcp_pdu_free_queue; 266 /* Number of working pdus */ 267 uint32_t tcp_pdu_working_count; 268 269 /* Number of requests in each state */ 270 uint32_t state_cntr[TCP_REQUEST_NUM_STATES]; 271 272 uint8_t cpda; 273 274 bool host_hdgst_enable; 275 bool host_ddgst_enable; 276 277 /* This is a spare PDU used for sending special management 278 * operations. Primarily, this is used for the initial 279 * connection response and c2h termination request. */ 280 struct nvme_tcp_pdu *mgmt_pdu; 281 282 /* Arrays of in-capsule buffers, requests, and pdus. 283 * Each array is 'resource_count' number of elements */ 284 void *bufs; 285 struct spdk_nvmf_tcp_req *reqs; 286 struct nvme_tcp_pdu *pdus; 287 uint32_t resource_count; 288 uint32_t recv_buf_size; 289 290 struct spdk_nvmf_tcp_port *port; 291 292 /* IP address */ 293 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 294 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 295 296 /* IP port */ 297 uint16_t initiator_port; 298 uint16_t target_port; 299 300 /* Timer used to destroy qpair after detecting transport error issue if initiator does 301 * not close the connection. 302 */ 303 struct spdk_poller *timeout_poller; 304 305 spdk_nvmf_transport_qpair_fini_cb fini_cb_fn; 306 void *fini_cb_arg; 307 308 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 309 }; 310 311 struct spdk_nvmf_tcp_control_msg { 312 STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link; 313 }; 314 315 struct spdk_nvmf_tcp_control_msg_list { 316 void *msg_buf; 317 STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs; 318 }; 319 320 struct spdk_nvmf_tcp_poll_group { 321 struct spdk_nvmf_transport_poll_group group; 322 struct spdk_sock_group *sock_group; 323 324 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 325 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req; 326 327 struct spdk_io_channel *accel_channel; 328 struct spdk_nvmf_tcp_control_msg_list *control_msg_list; 329 330 TAILQ_ENTRY(spdk_nvmf_tcp_poll_group) link; 331 }; 332 333 struct spdk_nvmf_tcp_port { 334 const struct spdk_nvme_transport_id *trid; 335 struct spdk_sock *listen_sock; 336 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 337 }; 338 339 struct tcp_transport_opts { 340 bool c2h_success; 341 uint16_t control_msg_num; 342 uint32_t sock_priority; 343 }; 344 345 struct tcp_psk_entry { 346 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 347 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 348 char psk_identity[NVMF_PSK_IDENTITY_LEN]; 349 uint8_t psk[SPDK_TLS_PSK_MAX_LEN]; 350 uint32_t psk_size; 351 TAILQ_ENTRY(tcp_psk_entry) link; 352 }; 353 354 struct spdk_nvmf_tcp_transport { 355 struct spdk_nvmf_transport transport; 356 struct tcp_transport_opts tcp_opts; 357 358 struct spdk_nvmf_tcp_poll_group *next_pg; 359 360 struct spdk_poller *accept_poller; 361 362 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 363 TAILQ_HEAD(, spdk_nvmf_tcp_poll_group) poll_groups; 364 365 TAILQ_HEAD(, tcp_psk_entry) psks; 366 }; 367 368 static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = { 369 { 370 "c2h_success", offsetof(struct tcp_transport_opts, c2h_success), 371 spdk_json_decode_bool, true 372 }, 373 { 374 "control_msg_num", offsetof(struct tcp_transport_opts, control_msg_num), 375 spdk_json_decode_uint16, true 376 }, 377 { 378 "sock_priority", offsetof(struct tcp_transport_opts, sock_priority), 379 spdk_json_decode_uint32, true 380 }, 381 }; 382 383 static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 384 struct spdk_nvmf_tcp_req *tcp_req); 385 static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); 386 387 static void _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 388 struct spdk_nvmf_tcp_req *tcp_req); 389 390 static inline void 391 nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 392 enum spdk_nvmf_tcp_req_state state) 393 { 394 struct spdk_nvmf_qpair *qpair; 395 struct spdk_nvmf_tcp_qpair *tqpair; 396 397 qpair = tcp_req->req.qpair; 398 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 399 400 assert(tqpair->state_cntr[tcp_req->state] > 0); 401 tqpair->state_cntr[tcp_req->state]--; 402 tqpair->state_cntr[state]++; 403 404 tcp_req->state = state; 405 } 406 407 static inline struct nvme_tcp_pdu * 408 nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req) 409 { 410 assert(tcp_req->pdu_in_use == false); 411 412 memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu)); 413 tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 414 415 return tcp_req->pdu; 416 } 417 418 static struct spdk_nvmf_tcp_req * 419 nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 420 { 421 struct spdk_nvmf_tcp_req *tcp_req; 422 423 tcp_req = TAILQ_FIRST(&tqpair->tcp_req_free_queue); 424 if (spdk_unlikely(!tcp_req)) { 425 return NULL; 426 } 427 428 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 429 tcp_req->h2c_offset = 0; 430 tcp_req->has_in_capsule_data = false; 431 tcp_req->req.dif_enabled = false; 432 tcp_req->req.zcopy_phase = NVMF_ZCOPY_PHASE_NONE; 433 434 TAILQ_REMOVE(&tqpair->tcp_req_free_queue, tcp_req, state_link); 435 TAILQ_INSERT_TAIL(&tqpair->tcp_req_working_queue, tcp_req, state_link); 436 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 437 return tcp_req; 438 } 439 440 static inline void 441 nvmf_tcp_req_put(struct spdk_nvmf_tcp_qpair *tqpair, struct spdk_nvmf_tcp_req *tcp_req) 442 { 443 assert(!tcp_req->pdu_in_use); 444 445 TAILQ_REMOVE(&tqpair->tcp_req_working_queue, tcp_req, state_link); 446 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 447 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 448 } 449 450 static void 451 nvmf_tcp_request_free(void *cb_arg) 452 { 453 struct spdk_nvmf_tcp_transport *ttransport; 454 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 455 456 assert(tcp_req != NULL); 457 458 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req=%p will be freed\n", tcp_req); 459 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 460 struct spdk_nvmf_tcp_transport, transport); 461 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 462 nvmf_tcp_req_process(ttransport, tcp_req); 463 } 464 465 static int 466 nvmf_tcp_req_free(struct spdk_nvmf_request *req) 467 { 468 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 469 470 nvmf_tcp_request_free(tcp_req); 471 472 return 0; 473 } 474 475 static void 476 nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 477 enum spdk_nvmf_tcp_req_state state) 478 { 479 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 480 481 assert(state != TCP_REQUEST_STATE_FREE); 482 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 483 if (state == tcp_req->state) { 484 nvmf_tcp_request_free(tcp_req); 485 } 486 } 487 } 488 489 static void 490 nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 491 { 492 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 493 494 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 495 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 496 497 /* Wipe the requests waiting for buffer from the global list */ 498 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 499 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 500 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req, 501 spdk_nvmf_request, buf_link); 502 } 503 } 504 505 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 506 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 507 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 508 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 509 } 510 511 static void 512 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 513 { 514 int i; 515 struct spdk_nvmf_tcp_req *tcp_req; 516 517 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 518 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 519 SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]); 520 TAILQ_FOREACH(tcp_req, &tqpair->tcp_req_working_queue, state_link) { 521 if ((int)tcp_req->state == i) { 522 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool); 523 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 524 } 525 } 526 } 527 } 528 529 static void 530 _nvmf_tcp_qpair_destroy(void *_tqpair) 531 { 532 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 533 spdk_nvmf_transport_qpair_fini_cb cb_fn = tqpair->fini_cb_fn; 534 void *cb_arg = tqpair->fini_cb_arg; 535 int err = 0; 536 537 spdk_trace_record(TRACE_TCP_QP_DESTROY, 0, 0, (uintptr_t)tqpair); 538 539 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 540 541 err = spdk_sock_close(&tqpair->sock); 542 assert(err == 0); 543 nvmf_tcp_cleanup_all_states(tqpair); 544 545 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) { 546 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 547 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 548 tqpair->resource_count); 549 err++; 550 } 551 552 if (err > 0) { 553 nvmf_tcp_dump_qpair_req_contents(tqpair); 554 } 555 556 /* The timeout poller might still be registered here if we close the qpair before host 557 * terminates the connection. 558 */ 559 spdk_poller_unregister(&tqpair->timeout_poller); 560 spdk_dma_free(tqpair->pdus); 561 free(tqpair->reqs); 562 spdk_free(tqpair->bufs); 563 free(tqpair); 564 565 if (cb_fn != NULL) { 566 cb_fn(cb_arg); 567 } 568 569 SPDK_DEBUGLOG(nvmf_tcp, "Leave\n"); 570 } 571 572 static void 573 nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 574 { 575 /* Delay the destruction to make sure it isn't performed from the context of a sock 576 * callback. Otherwise, spdk_sock_close() might not abort pending requests, causing their 577 * completions to be executed after the qpair is freed. (Note: this fixed issue #2471.) 578 */ 579 spdk_thread_send_msg(spdk_get_thread(), _nvmf_tcp_qpair_destroy, tqpair); 580 } 581 582 static void 583 nvmf_tcp_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w) 584 { 585 struct spdk_nvmf_tcp_transport *ttransport; 586 assert(w != NULL); 587 588 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 589 spdk_json_write_named_bool(w, "c2h_success", ttransport->tcp_opts.c2h_success); 590 spdk_json_write_named_uint32(w, "sock_priority", ttransport->tcp_opts.sock_priority); 591 } 592 593 static int 594 nvmf_tcp_destroy(struct spdk_nvmf_transport *transport, 595 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 596 { 597 struct spdk_nvmf_tcp_transport *ttransport; 598 struct tcp_psk_entry *entry, *tmp; 599 600 assert(transport != NULL); 601 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 602 603 TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) { 604 TAILQ_REMOVE(&ttransport->psks, entry, link); 605 free(entry); 606 } 607 608 spdk_poller_unregister(&ttransport->accept_poller); 609 free(ttransport); 610 611 if (cb_fn) { 612 cb_fn(cb_arg); 613 } 614 return 0; 615 } 616 617 static int nvmf_tcp_accept(void *ctx); 618 619 static struct spdk_nvmf_transport * 620 nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 621 { 622 struct spdk_nvmf_tcp_transport *ttransport; 623 uint32_t sge_count; 624 uint32_t min_shared_buffers; 625 626 ttransport = calloc(1, sizeof(*ttransport)); 627 if (!ttransport) { 628 return NULL; 629 } 630 631 TAILQ_INIT(&ttransport->ports); 632 TAILQ_INIT(&ttransport->poll_groups); 633 TAILQ_INIT(&ttransport->psks); 634 635 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 636 637 ttransport->tcp_opts.c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 638 ttransport->tcp_opts.sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 639 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 640 if (opts->transport_specific != NULL && 641 spdk_json_decode_object_relaxed(opts->transport_specific, tcp_transport_opts_decoder, 642 SPDK_COUNTOF(tcp_transport_opts_decoder), 643 &ttransport->tcp_opts)) { 644 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 645 free(ttransport); 646 return NULL; 647 } 648 649 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 650 651 SPDK_INFOLOG(nvmf_tcp, "*** TCP Transport Init ***\n" 652 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 653 " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 654 " in_capsule_data_size=%d, max_aq_depth=%d\n" 655 " num_shared_buffers=%d, c2h_success=%d,\n" 656 " dif_insert_or_strip=%d, sock_priority=%d\n" 657 " abort_timeout_sec=%d, control_msg_num=%hu\n", 658 opts->max_queue_depth, 659 opts->max_io_size, 660 opts->max_qpairs_per_ctrlr - 1, 661 opts->io_unit_size, 662 opts->in_capsule_data_size, 663 opts->max_aq_depth, 664 opts->num_shared_buffers, 665 ttransport->tcp_opts.c2h_success, 666 opts->dif_insert_or_strip, 667 ttransport->tcp_opts.sock_priority, 668 opts->abort_timeout_sec, 669 ttransport->tcp_opts.control_msg_num); 670 671 if (ttransport->tcp_opts.sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 672 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 673 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 674 ttransport->tcp_opts.sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 675 free(ttransport); 676 return NULL; 677 } 678 679 if (ttransport->tcp_opts.control_msg_num == 0 && 680 opts->in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 681 SPDK_WARNLOG("TCP param control_msg_num can't be 0 if ICD is less than %u bytes. Using default value %u\n", 682 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM); 683 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 684 } 685 686 /* I/O unit size cannot be larger than max I/O size */ 687 if (opts->io_unit_size > opts->max_io_size) { 688 SPDK_WARNLOG("TCP param io_unit_size %u can't be larger than max_io_size %u. Using max_io_size as io_unit_size\n", 689 opts->io_unit_size, opts->max_io_size); 690 opts->io_unit_size = opts->max_io_size; 691 } 692 693 /* In capsule data size cannot be larger than max I/O size */ 694 if (opts->in_capsule_data_size > opts->max_io_size) { 695 SPDK_WARNLOG("TCP param ICD size %u can't be larger than max_io_size %u. Using max_io_size as ICD size\n", 696 opts->io_unit_size, opts->max_io_size); 697 opts->in_capsule_data_size = opts->max_io_size; 698 } 699 700 /* max IO queue depth cannot be smaller than 2 or larger than 65535. 701 * We will not check SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, because max_queue_depth is 16bits and always not larger than 64k. */ 702 if (opts->max_queue_depth < SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH) { 703 SPDK_WARNLOG("TCP param max_queue_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 704 opts->max_queue_depth, SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH, 705 SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH); 706 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 707 } 708 709 /* max admin queue depth cannot be smaller than 2 or larger than 4096 */ 710 if (opts->max_aq_depth < SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH || 711 opts->max_aq_depth > SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH) { 712 SPDK_WARNLOG("TCP param max_aq_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 713 opts->max_aq_depth, SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH, 714 SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH); 715 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 716 } 717 718 sge_count = opts->max_io_size / opts->io_unit_size; 719 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 720 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 721 free(ttransport); 722 return NULL; 723 } 724 725 /* If buf_cache_size == UINT32_MAX, we will dynamically pick a cache size later that we know will fit. */ 726 if (opts->buf_cache_size < UINT32_MAX) { 727 min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size; 728 if (min_shared_buffers > opts->num_shared_buffers) { 729 SPDK_ERRLOG("There are not enough buffers to satisfy " 730 "per-poll group caches for each thread. (%" PRIu32 ") " 731 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 732 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 733 free(ttransport); 734 return NULL; 735 } 736 } 737 738 ttransport->accept_poller = SPDK_POLLER_REGISTER(nvmf_tcp_accept, &ttransport->transport, 739 opts->acceptor_poll_rate); 740 if (!ttransport->accept_poller) { 741 free(ttransport); 742 return NULL; 743 } 744 745 return &ttransport->transport; 746 } 747 748 static int 749 nvmf_tcp_trsvcid_to_int(const char *trsvcid) 750 { 751 unsigned long long ull; 752 char *end = NULL; 753 754 ull = strtoull(trsvcid, &end, 10); 755 if (end == NULL || end == trsvcid || *end != '\0') { 756 return -1; 757 } 758 759 /* Valid TCP/IP port numbers are in [0, 65535] */ 760 if (ull > 65535) { 761 return -1; 762 } 763 764 return (int)ull; 765 } 766 767 /** 768 * Canonicalize a listen address trid. 769 */ 770 static int 771 nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 772 const struct spdk_nvme_transport_id *trid) 773 { 774 int trsvcid_int; 775 776 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 777 if (trsvcid_int < 0) { 778 return -EINVAL; 779 } 780 781 memset(canon_trid, 0, sizeof(*canon_trid)); 782 spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP); 783 canon_trid->adrfam = trid->adrfam; 784 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 785 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 786 787 return 0; 788 } 789 790 /** 791 * Find an existing listening port. 792 */ 793 static struct spdk_nvmf_tcp_port * 794 nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 795 const struct spdk_nvme_transport_id *trid) 796 { 797 struct spdk_nvme_transport_id canon_trid; 798 struct spdk_nvmf_tcp_port *port; 799 800 if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 801 return NULL; 802 } 803 804 TAILQ_FOREACH(port, &ttransport->ports, link) { 805 if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) { 806 return port; 807 } 808 } 809 810 return NULL; 811 } 812 813 static int 814 tcp_sock_get_key(uint8_t *out, int out_len, const char **cipher, const char *psk_identity, 815 void *get_key_ctx) 816 { 817 struct tcp_psk_entry *entry; 818 struct spdk_nvmf_tcp_transport *ttransport = get_key_ctx; 819 size_t psk_len; 820 int rc; 821 822 *cipher = NULL; 823 824 TAILQ_FOREACH(entry, &ttransport->psks, link) { 825 if (strcmp(psk_identity, entry->psk_identity) != 0) { 826 continue; 827 } 828 829 psk_len = entry->psk_size; 830 if ((size_t)out_len < psk_len) { 831 SPDK_ERRLOG("Out buffer of size: %" PRIu32 " cannot fit PSK of len: %lu\n", 832 out_len, psk_len); 833 return -ENOBUFS; 834 } 835 836 /* Convert PSK to the TLS PSK format. */ 837 rc = nvme_tcp_derive_tls_psk(entry->psk, psk_len, psk_identity, out, out_len); 838 if (rc < 0) { 839 SPDK_ERRLOG("Could not generate TLS PSK\n"); 840 } 841 842 return rc; 843 } 844 845 SPDK_ERRLOG("Could not find PSK for identity: %s\n", psk_identity); 846 847 return -EINVAL; 848 } 849 850 static int 851 nvmf_tcp_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid, 852 struct spdk_nvmf_listen_opts *listen_opts) 853 { 854 struct spdk_nvmf_tcp_transport *ttransport; 855 struct spdk_nvmf_tcp_port *port; 856 int trsvcid_int; 857 uint8_t adrfam; 858 const char *sock_impl_name; 859 struct spdk_sock_impl_opts impl_opts; 860 size_t impl_opts_size = sizeof(impl_opts); 861 struct spdk_sock_opts opts; 862 863 if (!strlen(trid->trsvcid)) { 864 SPDK_ERRLOG("Service id is required\n"); 865 return -EINVAL; 866 } 867 868 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 869 870 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 871 if (trsvcid_int < 0) { 872 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 873 return -EINVAL; 874 } 875 876 port = calloc(1, sizeof(*port)); 877 if (!port) { 878 SPDK_ERRLOG("Port allocation failed\n"); 879 return -ENOMEM; 880 } 881 882 port->trid = trid; 883 884 sock_impl_name = NULL; 885 886 opts.opts_size = sizeof(opts); 887 spdk_sock_get_default_opts(&opts); 888 opts.priority = ttransport->tcp_opts.sock_priority; 889 if (listen_opts->secure_channel) { 890 sock_impl_name = "ssl"; 891 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 892 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 893 impl_opts.get_key = tcp_sock_get_key; 894 impl_opts.get_key_ctx = ttransport; 895 opts.impl_opts = &impl_opts; 896 opts.impl_opts_size = sizeof(impl_opts); 897 } 898 899 port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int, 900 sock_impl_name, &opts); 901 if (port->listen_sock == NULL) { 902 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 903 trid->traddr, trsvcid_int, 904 spdk_strerror(errno), errno); 905 free(port); 906 return -errno; 907 } 908 909 if (spdk_sock_is_ipv4(port->listen_sock)) { 910 adrfam = SPDK_NVMF_ADRFAM_IPV4; 911 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 912 adrfam = SPDK_NVMF_ADRFAM_IPV6; 913 } else { 914 SPDK_ERRLOG("Unhandled socket type\n"); 915 adrfam = 0; 916 } 917 918 if (adrfam != trid->adrfam) { 919 SPDK_ERRLOG("Socket address family mismatch\n"); 920 spdk_sock_close(&port->listen_sock); 921 free(port); 922 return -EINVAL; 923 } 924 925 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n", 926 trid->traddr, trid->trsvcid); 927 928 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 929 return 0; 930 } 931 932 static void 933 nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 934 const struct spdk_nvme_transport_id *trid) 935 { 936 struct spdk_nvmf_tcp_transport *ttransport; 937 struct spdk_nvmf_tcp_port *port; 938 939 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 940 941 SPDK_DEBUGLOG(nvmf_tcp, "Removing listen address %s port %s\n", 942 trid->traddr, trid->trsvcid); 943 944 port = nvmf_tcp_find_port(ttransport, trid); 945 if (port) { 946 TAILQ_REMOVE(&ttransport->ports, port, link); 947 spdk_sock_close(&port->listen_sock); 948 free(port); 949 } 950 } 951 952 static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 953 enum nvme_tcp_pdu_recv_state state); 954 955 static void 956 nvmf_tcp_qpair_set_state(struct spdk_nvmf_tcp_qpair *tqpair, enum nvme_tcp_qpair_state state) 957 { 958 tqpair->state = state; 959 spdk_trace_record(TRACE_TCP_QP_STATE_CHANGE, tqpair->qpair.qid, 0, (uintptr_t)tqpair, 960 tqpair->state); 961 } 962 963 static void 964 nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair) 965 { 966 SPDK_DEBUGLOG(nvmf_tcp, "Disconnecting qpair %p\n", tqpair); 967 968 spdk_trace_record(TRACE_TCP_QP_DISCONNECT, 0, 0, (uintptr_t)tqpair); 969 970 if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) { 971 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_EXITING); 972 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 973 spdk_poller_unregister(&tqpair->timeout_poller); 974 975 /* This will end up calling nvmf_tcp_close_qpair */ 976 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 977 } 978 } 979 980 static void 981 _mgmt_pdu_write_done(void *_tqpair, int err) 982 { 983 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 984 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 985 986 if (spdk_unlikely(err != 0)) { 987 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 988 return; 989 } 990 991 assert(pdu->cb_fn != NULL); 992 pdu->cb_fn(pdu->cb_arg); 993 } 994 995 static void 996 _req_pdu_write_done(void *req, int err) 997 { 998 struct spdk_nvmf_tcp_req *tcp_req = req; 999 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 1000 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1001 1002 assert(tcp_req->pdu_in_use); 1003 tcp_req->pdu_in_use = false; 1004 1005 /* If the request is in a completed state, we're waiting for write completion to free it */ 1006 if (spdk_unlikely(tcp_req->state == TCP_REQUEST_STATE_COMPLETED)) { 1007 nvmf_tcp_request_free(tcp_req); 1008 return; 1009 } 1010 1011 if (spdk_unlikely(err != 0)) { 1012 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1013 return; 1014 } 1015 1016 assert(pdu->cb_fn != NULL); 1017 pdu->cb_fn(pdu->cb_arg); 1018 } 1019 1020 static void 1021 _pdu_write_done(struct nvme_tcp_pdu *pdu, int err) 1022 { 1023 pdu->sock_req.cb_fn(pdu->sock_req.cb_arg, err); 1024 } 1025 1026 static void 1027 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 1028 { 1029 int rc; 1030 uint32_t mapped_length; 1031 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1032 1033 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 1034 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, &mapped_length); 1035 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 1036 1037 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 1038 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 1039 /* Try to force the send immediately. */ 1040 rc = spdk_sock_flush(tqpair->sock); 1041 if (rc > 0 && (uint32_t)rc == mapped_length) { 1042 _pdu_write_done(pdu, 0); 1043 } else { 1044 SPDK_ERRLOG("Could not write %s to socket: rc=%d, errno=%d\n", 1045 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ? 1046 "IC_RESP" : "TERM_REQ", rc, errno); 1047 _pdu_write_done(pdu, rc >= 0 ? -EAGAIN : -errno); 1048 } 1049 } 1050 } 1051 1052 static void 1053 data_crc32_accel_done(void *cb_arg, int status) 1054 { 1055 struct nvme_tcp_pdu *pdu = cb_arg; 1056 1057 if (spdk_unlikely(status)) { 1058 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1059 _pdu_write_done(pdu, status); 1060 return; 1061 } 1062 1063 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1064 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1065 1066 _tcp_write_pdu(pdu); 1067 } 1068 1069 static void 1070 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 1071 { 1072 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1073 int rc = 0; 1074 1075 /* Data Digest */ 1076 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { 1077 /* Only support this limitated case for the first step */ 1078 if (spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1079 && tqpair->group)) { 1080 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 1081 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 1082 if (spdk_likely(rc == 0)) { 1083 return; 1084 } 1085 } else { 1086 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1087 } 1088 data_crc32_accel_done(pdu, rc); 1089 } else { 1090 _tcp_write_pdu(pdu); 1091 } 1092 } 1093 1094 static void 1095 nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1096 struct nvme_tcp_pdu *pdu, 1097 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1098 void *cb_arg) 1099 { 1100 int hlen; 1101 uint32_t crc32c; 1102 1103 assert(tqpair->pdu_in_progress != pdu); 1104 1105 hlen = pdu->hdr.common.hlen; 1106 pdu->cb_fn = cb_fn; 1107 pdu->cb_arg = cb_arg; 1108 1109 pdu->iov[0].iov_base = &pdu->hdr.raw; 1110 pdu->iov[0].iov_len = hlen; 1111 1112 /* Header Digest */ 1113 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { 1114 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1115 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 1116 } 1117 1118 /* Data Digest */ 1119 pdu_data_crc32_compute(pdu); 1120 } 1121 1122 static void 1123 nvmf_tcp_qpair_write_mgmt_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1124 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1125 void *cb_arg) 1126 { 1127 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 1128 1129 pdu->sock_req.cb_fn = _mgmt_pdu_write_done; 1130 pdu->sock_req.cb_arg = tqpair; 1131 1132 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1133 } 1134 1135 static void 1136 nvmf_tcp_qpair_write_req_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1137 struct spdk_nvmf_tcp_req *tcp_req, 1138 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1139 void *cb_arg) 1140 { 1141 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 1142 1143 pdu->sock_req.cb_fn = _req_pdu_write_done; 1144 pdu->sock_req.cb_arg = tcp_req; 1145 1146 assert(!tcp_req->pdu_in_use); 1147 tcp_req->pdu_in_use = true; 1148 1149 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1150 } 1151 1152 static int 1153 nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair) 1154 { 1155 uint32_t i; 1156 struct spdk_nvmf_transport_opts *opts; 1157 uint32_t in_capsule_data_size; 1158 1159 opts = &tqpair->qpair.transport->opts; 1160 1161 in_capsule_data_size = opts->in_capsule_data_size; 1162 if (opts->dif_insert_or_strip) { 1163 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 1164 } 1165 1166 tqpair->resource_count = opts->max_queue_depth; 1167 1168 tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs)); 1169 if (!tqpair->reqs) { 1170 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 1171 return -1; 1172 } 1173 1174 if (in_capsule_data_size) { 1175 tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000, 1176 NULL, SPDK_ENV_LCORE_ID_ANY, 1177 SPDK_MALLOC_DMA); 1178 if (!tqpair->bufs) { 1179 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 1180 return -1; 1181 } 1182 } 1183 /* prepare memory space for receiving pdus and tcp_req */ 1184 /* Add additional 1 member, which will be used for mgmt_pdu owned by the tqpair */ 1185 tqpair->pdus = spdk_dma_zmalloc((2 * tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000, 1186 NULL); 1187 if (!tqpair->pdus) { 1188 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 1189 return -1; 1190 } 1191 1192 for (i = 0; i < tqpair->resource_count; i++) { 1193 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 1194 1195 tcp_req->ttag = i + 1; 1196 tcp_req->req.qpair = &tqpair->qpair; 1197 1198 tcp_req->pdu = &tqpair->pdus[i]; 1199 tcp_req->pdu->qpair = tqpair; 1200 1201 /* Set up memory to receive commands */ 1202 if (tqpair->bufs) { 1203 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 1204 } 1205 1206 /* Set the cmdn and rsp */ 1207 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 1208 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 1209 1210 tcp_req->req.stripped_data = NULL; 1211 1212 /* Initialize request state to FREE */ 1213 tcp_req->state = TCP_REQUEST_STATE_FREE; 1214 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 1215 tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++; 1216 } 1217 1218 for (; i < 2 * tqpair->resource_count; i++) { 1219 struct nvme_tcp_pdu *pdu = &tqpair->pdus[i]; 1220 1221 pdu->qpair = tqpair; 1222 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 1223 } 1224 1225 tqpair->mgmt_pdu = &tqpair->pdus[i]; 1226 tqpair->mgmt_pdu->qpair = tqpair; 1227 tqpair->pdu_in_progress = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 1228 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 1229 tqpair->tcp_pdu_working_count = 1; 1230 1231 tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * 1232 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1233 1234 return 0; 1235 } 1236 1237 static int 1238 nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 1239 { 1240 struct spdk_nvmf_tcp_qpair *tqpair; 1241 1242 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1243 1244 SPDK_DEBUGLOG(nvmf_tcp, "New TCP Connection: %p\n", qpair); 1245 1246 spdk_trace_record(TRACE_TCP_QP_CREATE, 0, 0, (uintptr_t)tqpair); 1247 1248 /* Initialise request state queues of the qpair */ 1249 TAILQ_INIT(&tqpair->tcp_req_free_queue); 1250 TAILQ_INIT(&tqpair->tcp_req_working_queue); 1251 SLIST_INIT(&tqpair->tcp_pdu_free_queue); 1252 1253 tqpair->host_hdgst_enable = true; 1254 tqpair->host_ddgst_enable = true; 1255 1256 return 0; 1257 } 1258 1259 static int 1260 nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 1261 { 1262 int rc; 1263 1264 spdk_trace_record(TRACE_TCP_QP_SOCK_INIT, 0, 0, (uintptr_t)tqpair); 1265 1266 /* set low water mark */ 1267 rc = spdk_sock_set_recvlowat(tqpair->sock, 1); 1268 if (rc != 0) { 1269 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 1270 return rc; 1271 } 1272 1273 return 0; 1274 } 1275 1276 static void 1277 nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, 1278 struct spdk_nvmf_tcp_port *port, 1279 struct spdk_sock *sock) 1280 { 1281 struct spdk_nvmf_tcp_qpair *tqpair; 1282 int rc; 1283 1284 SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n", 1285 port->trid->traddr, port->trid->trsvcid); 1286 1287 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 1288 if (tqpair == NULL) { 1289 SPDK_ERRLOG("Could not allocate new connection.\n"); 1290 spdk_sock_close(&sock); 1291 return; 1292 } 1293 1294 tqpair->sock = sock; 1295 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0; 1296 tqpair->port = port; 1297 tqpair->qpair.transport = transport; 1298 1299 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 1300 sizeof(tqpair->target_addr), &tqpair->target_port, 1301 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 1302 &tqpair->initiator_port); 1303 if (rc < 0) { 1304 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 1305 nvmf_tcp_qpair_destroy(tqpair); 1306 return; 1307 } 1308 1309 spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair); 1310 } 1311 1312 static uint32_t 1313 nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port) 1314 { 1315 struct spdk_sock *sock; 1316 uint32_t count = 0; 1317 int i; 1318 1319 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 1320 sock = spdk_sock_accept(port->listen_sock); 1321 if (sock == NULL) { 1322 break; 1323 } 1324 count++; 1325 nvmf_tcp_handle_connect(transport, port, sock); 1326 } 1327 1328 return count; 1329 } 1330 1331 static int 1332 nvmf_tcp_accept(void *ctx) 1333 { 1334 struct spdk_nvmf_transport *transport = ctx; 1335 struct spdk_nvmf_tcp_transport *ttransport; 1336 struct spdk_nvmf_tcp_port *port; 1337 uint32_t count = 0; 1338 1339 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1340 1341 TAILQ_FOREACH(port, &ttransport->ports, link) { 1342 count += nvmf_tcp_port_accept(transport, port); 1343 } 1344 1345 return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 1346 } 1347 1348 static void 1349 nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1350 struct spdk_nvme_transport_id *trid, 1351 struct spdk_nvmf_discovery_log_page_entry *entry) 1352 { 1353 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1354 entry->adrfam = trid->adrfam; 1355 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; 1356 1357 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1358 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1359 1360 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1361 } 1362 1363 static struct spdk_nvmf_tcp_control_msg_list * 1364 nvmf_tcp_control_msg_list_create(uint16_t num_messages) 1365 { 1366 struct spdk_nvmf_tcp_control_msg_list *list; 1367 struct spdk_nvmf_tcp_control_msg *msg; 1368 uint16_t i; 1369 1370 list = calloc(1, sizeof(*list)); 1371 if (!list) { 1372 SPDK_ERRLOG("Failed to allocate memory for list structure\n"); 1373 return NULL; 1374 } 1375 1376 list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, 1377 NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 1378 if (!list->msg_buf) { 1379 SPDK_ERRLOG("Failed to allocate memory for control message buffers\n"); 1380 free(list); 1381 return NULL; 1382 } 1383 1384 STAILQ_INIT(&list->free_msgs); 1385 1386 for (i = 0; i < num_messages; i++) { 1387 msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i * 1388 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1389 STAILQ_INSERT_TAIL(&list->free_msgs, msg, link); 1390 } 1391 1392 return list; 1393 } 1394 1395 static void 1396 nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list) 1397 { 1398 if (!list) { 1399 return; 1400 } 1401 1402 spdk_free(list->msg_buf); 1403 free(list); 1404 } 1405 1406 static struct spdk_nvmf_transport_poll_group * 1407 nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport, 1408 struct spdk_nvmf_poll_group *group) 1409 { 1410 struct spdk_nvmf_tcp_transport *ttransport; 1411 struct spdk_nvmf_tcp_poll_group *tgroup; 1412 1413 tgroup = calloc(1, sizeof(*tgroup)); 1414 if (!tgroup) { 1415 return NULL; 1416 } 1417 1418 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1419 if (!tgroup->sock_group) { 1420 goto cleanup; 1421 } 1422 1423 TAILQ_INIT(&tgroup->qpairs); 1424 TAILQ_INIT(&tgroup->await_req); 1425 1426 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1427 1428 if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 1429 SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). " 1430 "Creating control messages list\n", transport->opts.in_capsule_data_size, 1431 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1432 tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(ttransport->tcp_opts.control_msg_num); 1433 if (!tgroup->control_msg_list) { 1434 goto cleanup; 1435 } 1436 } 1437 1438 tgroup->accel_channel = spdk_accel_get_io_channel(); 1439 if (spdk_unlikely(!tgroup->accel_channel)) { 1440 SPDK_ERRLOG("Cannot create accel_channel for tgroup=%p\n", tgroup); 1441 goto cleanup; 1442 } 1443 1444 TAILQ_INSERT_TAIL(&ttransport->poll_groups, tgroup, link); 1445 if (ttransport->next_pg == NULL) { 1446 ttransport->next_pg = tgroup; 1447 } 1448 1449 return &tgroup->group; 1450 1451 cleanup: 1452 nvmf_tcp_poll_group_destroy(&tgroup->group); 1453 return NULL; 1454 } 1455 1456 static struct spdk_nvmf_transport_poll_group * 1457 nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1458 { 1459 struct spdk_nvmf_tcp_transport *ttransport; 1460 struct spdk_nvmf_tcp_poll_group **pg; 1461 struct spdk_nvmf_tcp_qpair *tqpair; 1462 struct spdk_sock_group *group = NULL, *hint = NULL; 1463 int rc; 1464 1465 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 1466 1467 if (TAILQ_EMPTY(&ttransport->poll_groups)) { 1468 return NULL; 1469 } 1470 1471 pg = &ttransport->next_pg; 1472 assert(*pg != NULL); 1473 hint = (*pg)->sock_group; 1474 1475 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1476 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, hint); 1477 if (rc != 0) { 1478 return NULL; 1479 } else if (group != NULL) { 1480 /* Optimal poll group was found */ 1481 return spdk_sock_group_get_ctx(group); 1482 } 1483 1484 /* The hint was used for optimal poll group, advance next_pg. */ 1485 *pg = TAILQ_NEXT(*pg, link); 1486 if (*pg == NULL) { 1487 *pg = TAILQ_FIRST(&ttransport->poll_groups); 1488 } 1489 1490 return spdk_sock_group_get_ctx(hint); 1491 } 1492 1493 static void 1494 nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1495 { 1496 struct spdk_nvmf_tcp_poll_group *tgroup, *next_tgroup; 1497 struct spdk_nvmf_tcp_transport *ttransport; 1498 1499 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1500 spdk_sock_group_close(&tgroup->sock_group); 1501 if (tgroup->control_msg_list) { 1502 nvmf_tcp_control_msg_list_free(tgroup->control_msg_list); 1503 } 1504 1505 if (tgroup->accel_channel) { 1506 spdk_put_io_channel(tgroup->accel_channel); 1507 } 1508 1509 ttransport = SPDK_CONTAINEROF(tgroup->group.transport, struct spdk_nvmf_tcp_transport, transport); 1510 1511 next_tgroup = TAILQ_NEXT(tgroup, link); 1512 TAILQ_REMOVE(&ttransport->poll_groups, tgroup, link); 1513 if (next_tgroup == NULL) { 1514 next_tgroup = TAILQ_FIRST(&ttransport->poll_groups); 1515 } 1516 if (ttransport->next_pg == tgroup) { 1517 ttransport->next_pg = next_tgroup; 1518 } 1519 1520 free(tgroup); 1521 } 1522 1523 static void 1524 nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1525 enum nvme_tcp_pdu_recv_state state) 1526 { 1527 if (tqpair->recv_state == state) { 1528 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1529 tqpair, state); 1530 return; 1531 } 1532 1533 if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_QUIESCING)) { 1534 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH && tqpair->pdu_in_progress) { 1535 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, tqpair->pdu_in_progress, slist); 1536 tqpair->tcp_pdu_working_count--; 1537 } 1538 } 1539 1540 if (spdk_unlikely(state == NVME_TCP_PDU_RECV_STATE_ERROR)) { 1541 assert(tqpair->tcp_pdu_working_count == 0); 1542 } 1543 1544 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1545 /* When leaving the await req state, move the qpair to the main list */ 1546 TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link); 1547 TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link); 1548 } else if (state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1549 TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link); 1550 TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link); 1551 } 1552 1553 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state); 1554 tqpair->recv_state = state; 1555 1556 spdk_trace_record(TRACE_TCP_QP_RCV_STATE_CHANGE, tqpair->qpair.qid, 0, (uintptr_t)tqpair, 1557 tqpair->recv_state); 1558 } 1559 1560 static int 1561 nvmf_tcp_qpair_handle_timeout(void *ctx) 1562 { 1563 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1564 1565 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1566 1567 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1568 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1569 1570 nvmf_tcp_qpair_disconnect(tqpair); 1571 return SPDK_POLLER_BUSY; 1572 } 1573 1574 static void 1575 nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1576 { 1577 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1578 1579 if (!tqpair->timeout_poller) { 1580 tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair, 1581 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1582 } 1583 } 1584 1585 static void 1586 nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1587 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1588 { 1589 struct nvme_tcp_pdu *rsp_pdu; 1590 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1591 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1592 uint32_t copy_len; 1593 1594 rsp_pdu = tqpair->mgmt_pdu; 1595 1596 c2h_term_req = &rsp_pdu->hdr.term_req; 1597 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1598 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1599 c2h_term_req->fes = fes; 1600 1601 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1602 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1603 DSET32(&c2h_term_req->fei, error_offset); 1604 } 1605 1606 copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); 1607 1608 /* Copy the error info into the buffer */ 1609 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1610 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1611 1612 /* Contain the header of the wrong received pdu */ 1613 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1614 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1615 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_c2h_term_req_complete, tqpair); 1616 } 1617 1618 static void 1619 nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1620 struct spdk_nvmf_tcp_qpair *tqpair, 1621 struct nvme_tcp_pdu *pdu) 1622 { 1623 struct spdk_nvmf_tcp_req *tcp_req; 1624 1625 assert(pdu->psh_valid_bytes == pdu->psh_len); 1626 assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD); 1627 1628 tcp_req = nvmf_tcp_req_get(tqpair); 1629 if (!tcp_req) { 1630 /* Directly return and make the allocation retry again. This can happen if we're 1631 * using asynchronous writes to send the response to the host or when releasing 1632 * zero-copy buffers after a response has been sent. In both cases, the host might 1633 * receive the response before we've finished processing the request and is free to 1634 * send another one. 1635 */ 1636 if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0 || 1637 tqpair->state_cntr[TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE] > 0) { 1638 return; 1639 } 1640 1641 /* The host sent more commands than the maximum queue depth. */ 1642 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 1643 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1644 return; 1645 } 1646 1647 pdu->req = tcp_req; 1648 assert(tcp_req->state == TCP_REQUEST_STATE_NEW); 1649 nvmf_tcp_req_process(ttransport, tcp_req); 1650 } 1651 1652 static void 1653 nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1654 struct spdk_nvmf_tcp_qpair *tqpair, 1655 struct nvme_tcp_pdu *pdu) 1656 { 1657 struct spdk_nvmf_tcp_req *tcp_req; 1658 struct spdk_nvme_tcp_cmd *capsule_cmd; 1659 uint32_t error_offset = 0; 1660 enum spdk_nvme_tcp_term_req_fes fes; 1661 struct spdk_nvme_cpl *rsp; 1662 1663 capsule_cmd = &pdu->hdr.capsule_cmd; 1664 tcp_req = pdu->req; 1665 assert(tcp_req != NULL); 1666 1667 /* Zero-copy requests don't support ICD */ 1668 assert(!spdk_nvmf_request_using_zcopy(&tcp_req->req)); 1669 1670 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1671 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1672 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1673 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1674 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1675 goto err; 1676 } 1677 1678 rsp = &tcp_req->req.rsp->nvme_cpl; 1679 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 1680 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 1681 } else { 1682 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1683 } 1684 1685 nvmf_tcp_req_process(ttransport, tcp_req); 1686 1687 return; 1688 err: 1689 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1690 } 1691 1692 static void 1693 nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1694 struct spdk_nvmf_tcp_qpair *tqpair, 1695 struct nvme_tcp_pdu *pdu) 1696 { 1697 struct spdk_nvmf_tcp_req *tcp_req; 1698 uint32_t error_offset = 0; 1699 enum spdk_nvme_tcp_term_req_fes fes = 0; 1700 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1701 1702 h2c_data = &pdu->hdr.h2c_data; 1703 1704 SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1705 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1706 1707 if (h2c_data->ttag > tqpair->resource_count) { 1708 SPDK_DEBUGLOG(nvmf_tcp, "ttag %u is larger than allowed %u.\n", h2c_data->ttag, 1709 tqpair->resource_count); 1710 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1711 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1712 goto err; 1713 } 1714 1715 tcp_req = &tqpair->reqs[h2c_data->ttag - 1]; 1716 1717 if (spdk_unlikely(tcp_req->state != TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER && 1718 tcp_req->state != TCP_REQUEST_STATE_AWAITING_R2T_ACK)) { 1719 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, has error state in %d\n", tcp_req, tqpair, 1720 tcp_req->state); 1721 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1722 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1723 goto err; 1724 } 1725 1726 if (spdk_unlikely(tcp_req->req.cmd->nvme_cmd.cid != h2c_data->cccid)) { 1727 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, expected %u but %u for cccid.\n", tcp_req, tqpair, 1728 tcp_req->req.cmd->nvme_cmd.cid, h2c_data->cccid); 1729 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1730 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1731 goto err; 1732 } 1733 1734 if (tcp_req->h2c_offset != h2c_data->datao) { 1735 SPDK_DEBUGLOG(nvmf_tcp, 1736 "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n", 1737 tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao); 1738 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1739 goto err; 1740 } 1741 1742 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1743 SPDK_DEBUGLOG(nvmf_tcp, 1744 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) exceeds requested length=%u\n", 1745 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1746 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1747 goto err; 1748 } 1749 1750 pdu->req = tcp_req; 1751 1752 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 1753 pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 1754 } 1755 1756 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 1757 h2c_data->datao, h2c_data->datal); 1758 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1759 return; 1760 1761 err: 1762 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1763 } 1764 1765 static void 1766 nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 1767 struct spdk_nvmf_tcp_qpair *tqpair) 1768 { 1769 struct nvme_tcp_pdu *rsp_pdu; 1770 struct spdk_nvme_tcp_rsp *capsule_resp; 1771 1772 SPDK_DEBUGLOG(nvmf_tcp, "enter, tqpair=%p\n", tqpair); 1773 1774 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1775 assert(rsp_pdu != NULL); 1776 1777 capsule_resp = &rsp_pdu->hdr.capsule_resp; 1778 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1779 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 1780 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 1781 if (tqpair->host_hdgst_enable) { 1782 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1783 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1784 } 1785 1786 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_request_free, tcp_req); 1787 } 1788 1789 static void 1790 nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 1791 { 1792 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1793 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 1794 struct spdk_nvmf_tcp_qpair, qpair); 1795 1796 assert(tqpair != NULL); 1797 1798 if (spdk_unlikely(tcp_req->pdu->rw_offset < tcp_req->req.length)) { 1799 SPDK_DEBUGLOG(nvmf_tcp, "sending another C2H part, offset %u length %u\n", tcp_req->pdu->rw_offset, 1800 tcp_req->req.length); 1801 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 1802 return; 1803 } 1804 1805 if (tcp_req->pdu->hdr.c2h_data.common.flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1806 nvmf_tcp_request_free(tcp_req); 1807 } else { 1808 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 1809 } 1810 } 1811 1812 static void 1813 nvmf_tcp_r2t_complete(void *cb_arg) 1814 { 1815 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1816 struct spdk_nvmf_tcp_transport *ttransport; 1817 1818 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 1819 struct spdk_nvmf_tcp_transport, transport); 1820 1821 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 1822 1823 if (tcp_req->h2c_offset == tcp_req->req.length) { 1824 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1825 nvmf_tcp_req_process(ttransport, tcp_req); 1826 } 1827 } 1828 1829 static void 1830 nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1831 struct spdk_nvmf_tcp_req *tcp_req) 1832 { 1833 struct nvme_tcp_pdu *rsp_pdu; 1834 struct spdk_nvme_tcp_r2t_hdr *r2t; 1835 1836 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1837 assert(rsp_pdu != NULL); 1838 1839 r2t = &rsp_pdu->hdr.r2t; 1840 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 1841 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 1842 1843 if (tqpair->host_hdgst_enable) { 1844 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1845 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1846 } 1847 1848 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 1849 r2t->ttag = tcp_req->ttag; 1850 r2t->r2to = tcp_req->h2c_offset; 1851 r2t->r2tl = tcp_req->req.length; 1852 1853 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 1854 1855 SPDK_DEBUGLOG(nvmf_tcp, 1856 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 1857 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 1858 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_r2t_complete, tcp_req); 1859 } 1860 1861 static void 1862 nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1863 struct spdk_nvmf_tcp_qpair *tqpair, 1864 struct nvme_tcp_pdu *pdu) 1865 { 1866 struct spdk_nvmf_tcp_req *tcp_req; 1867 struct spdk_nvme_cpl *rsp; 1868 1869 tcp_req = pdu->req; 1870 assert(tcp_req != NULL); 1871 1872 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1873 1874 tcp_req->h2c_offset += pdu->data_len; 1875 1876 /* Wait for all of the data to arrive AND for the initial R2T PDU send to be 1877 * acknowledged before moving on. */ 1878 if (tcp_req->h2c_offset == tcp_req->req.length && 1879 tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { 1880 /* After receiving all the h2c data, we need to check whether there is 1881 * transient transport error */ 1882 rsp = &tcp_req->req.rsp->nvme_cpl; 1883 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 1884 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 1885 } else { 1886 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1887 } 1888 nvmf_tcp_req_process(ttransport, tcp_req); 1889 } 1890 } 1891 1892 static void 1893 nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 1894 { 1895 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 1896 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 1897 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1898 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1899 SPDK_DEBUGLOG(nvmf_tcp, "The offset from the start of the PDU header is %u\n", 1900 DGET32(h2c_term_req->fei)); 1901 } 1902 } 1903 1904 static void 1905 nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1906 struct nvme_tcp_pdu *pdu) 1907 { 1908 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1909 uint32_t error_offset = 0; 1910 enum spdk_nvme_tcp_term_req_fes fes; 1911 1912 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1913 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 1914 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1915 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1916 goto end; 1917 } 1918 1919 /* set the data buffer */ 1920 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 1921 h2c_term_req->common.plen - h2c_term_req->common.hlen); 1922 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1923 return; 1924 end: 1925 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1926 } 1927 1928 static void 1929 nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1930 struct nvme_tcp_pdu *pdu) 1931 { 1932 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1933 1934 nvmf_tcp_h2c_term_req_dump(h2c_term_req); 1935 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1936 } 1937 1938 static void 1939 _nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1940 { 1941 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 1942 struct spdk_nvmf_tcp_transport, transport); 1943 1944 switch (pdu->hdr.common.pdu_type) { 1945 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1946 nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 1947 break; 1948 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1949 nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 1950 break; 1951 1952 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1953 nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 1954 break; 1955 1956 default: 1957 /* The code should not go to here */ 1958 SPDK_ERRLOG("ERROR pdu type %d\n", pdu->hdr.common.pdu_type); 1959 break; 1960 } 1961 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 1962 tqpair->tcp_pdu_working_count--; 1963 } 1964 1965 static void 1966 data_crc32_calc_done(void *cb_arg, int status) 1967 { 1968 struct nvme_tcp_pdu *pdu = cb_arg; 1969 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1970 struct spdk_nvmf_tcp_req *tcp_req; 1971 struct spdk_nvme_cpl *rsp; 1972 1973 /* async crc32 calculation is failed and use direct calculation to check */ 1974 if (spdk_unlikely(status)) { 1975 SPDK_ERRLOG("Data digest on tqpair=(%p) with pdu=%p failed to be calculated asynchronously\n", 1976 tqpair, pdu); 1977 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1978 } 1979 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1980 if (!MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32)) { 1981 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1982 tcp_req = pdu->req; 1983 assert(tcp_req != NULL); 1984 rsp = &tcp_req->req.rsp->nvme_cpl; 1985 rsp->status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1986 } 1987 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 1988 } 1989 1990 static void 1991 nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1992 { 1993 int rc = 0; 1994 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1995 tqpair->pdu_in_progress = NULL; 1996 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1997 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1998 /* check data digest if need */ 1999 if (pdu->ddgst_enable) { 2000 if (tqpair->qpair.qid != 0 && !pdu->dif_ctx && tqpair->group && 2001 (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)) { 2002 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 2003 pdu->data_iovcnt, 0, data_crc32_calc_done, pdu); 2004 if (spdk_likely(rc == 0)) { 2005 return; 2006 } 2007 } else { 2008 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 2009 } 2010 data_crc32_calc_done(pdu, rc); 2011 } else { 2012 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2013 } 2014 } 2015 2016 static void 2017 nvmf_tcp_send_icresp_complete(void *cb_arg) 2018 { 2019 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 2020 2021 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_RUNNING); 2022 } 2023 2024 static void 2025 nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 2026 struct spdk_nvmf_tcp_qpair *tqpair, 2027 struct nvme_tcp_pdu *pdu) 2028 { 2029 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 2030 struct nvme_tcp_pdu *rsp_pdu; 2031 struct spdk_nvme_tcp_ic_resp *ic_resp; 2032 uint32_t error_offset = 0; 2033 enum spdk_nvme_tcp_term_req_fes fes; 2034 2035 /* Only PFV 0 is defined currently */ 2036 if (ic_req->pfv != 0) { 2037 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 2038 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2039 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 2040 goto end; 2041 } 2042 2043 /* This value is 0’s based value in units of dwords should not be larger than SPDK_NVME_TCP_HPDA_MAX */ 2044 if (ic_req->hpda > SPDK_NVME_TCP_HPDA_MAX) { 2045 SPDK_ERRLOG("ICReq HPDA out of range 0 to 31, got %u\n", ic_req->hpda); 2046 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2047 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, hpda); 2048 goto end; 2049 } 2050 2051 /* MAXR2T is 0's based */ 2052 SPDK_DEBUGLOG(nvmf_tcp, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 2053 2054 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 2055 if (!tqpair->host_hdgst_enable) { 2056 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 2057 } 2058 2059 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 2060 if (!tqpair->host_ddgst_enable) { 2061 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 2062 } 2063 2064 tqpair->recv_buf_size = spdk_max(tqpair->recv_buf_size, MIN_SOCK_PIPE_SIZE); 2065 /* Now that we know whether digests are enabled, properly size the receive buffer */ 2066 if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) { 2067 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 2068 tqpair, 2069 tqpair->recv_buf_size); 2070 /* Not fatal. */ 2071 } 2072 2073 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 2074 SPDK_DEBUGLOG(nvmf_tcp, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 2075 2076 rsp_pdu = tqpair->mgmt_pdu; 2077 2078 ic_resp = &rsp_pdu->hdr.ic_resp; 2079 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 2080 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 2081 ic_resp->pfv = 0; 2082 ic_resp->cpda = tqpair->cpda; 2083 ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size; 2084 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 2085 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 2086 2087 SPDK_DEBUGLOG(nvmf_tcp, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 2088 SPDK_DEBUGLOG(nvmf_tcp, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 2089 2090 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_INITIALIZING); 2091 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_icresp_complete, tqpair); 2092 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2093 return; 2094 end: 2095 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2096 } 2097 2098 static void 2099 nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair, 2100 struct spdk_nvmf_tcp_transport *ttransport) 2101 { 2102 struct nvme_tcp_pdu *pdu; 2103 int rc; 2104 uint32_t crc32c, error_offset = 0; 2105 enum spdk_nvme_tcp_term_req_fes fes; 2106 2107 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2108 pdu = tqpair->pdu_in_progress; 2109 2110 SPDK_DEBUGLOG(nvmf_tcp, "pdu type of tqpair(%p) is %d\n", tqpair, 2111 pdu->hdr.common.pdu_type); 2112 /* check header digest if needed */ 2113 if (pdu->has_hdgst) { 2114 SPDK_DEBUGLOG(nvmf_tcp, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 2115 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 2116 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 2117 if (rc == 0) { 2118 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 2119 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 2120 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2121 return; 2122 2123 } 2124 } 2125 2126 switch (pdu->hdr.common.pdu_type) { 2127 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 2128 nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 2129 break; 2130 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2131 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ); 2132 break; 2133 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2134 nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 2135 break; 2136 2137 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2138 nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 2139 break; 2140 2141 default: 2142 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress->hdr.common.pdu_type); 2143 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2144 error_offset = 1; 2145 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2146 break; 2147 } 2148 } 2149 2150 static void 2151 nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 2152 { 2153 struct nvme_tcp_pdu *pdu; 2154 uint32_t error_offset = 0; 2155 enum spdk_nvme_tcp_term_req_fes fes; 2156 uint8_t expected_hlen, pdo; 2157 bool plen_error = false, pdo_error = false; 2158 2159 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2160 pdu = tqpair->pdu_in_progress; 2161 assert(pdu); 2162 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 2163 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 2164 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 2165 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2166 goto err; 2167 } 2168 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 2169 if (pdu->hdr.common.plen != expected_hlen) { 2170 plen_error = true; 2171 } 2172 } else { 2173 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 2174 SPDK_ERRLOG("The TCP/IP connection is not negotiated\n"); 2175 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2176 goto err; 2177 } 2178 2179 switch (pdu->hdr.common.pdu_type) { 2180 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2181 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 2182 pdo = pdu->hdr.common.pdo; 2183 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2184 pdo_error = true; 2185 break; 2186 } 2187 2188 if (pdu->hdr.common.plen < expected_hlen) { 2189 plen_error = true; 2190 } 2191 break; 2192 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2193 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 2194 pdo = pdu->hdr.common.pdo; 2195 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2196 pdo_error = true; 2197 break; 2198 } 2199 if (pdu->hdr.common.plen < expected_hlen) { 2200 plen_error = true; 2201 } 2202 break; 2203 2204 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2205 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 2206 if ((pdu->hdr.common.plen <= expected_hlen) || 2207 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 2208 plen_error = true; 2209 } 2210 break; 2211 2212 default: 2213 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 2214 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2215 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 2216 goto err; 2217 } 2218 } 2219 2220 if (pdu->hdr.common.hlen != expected_hlen) { 2221 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 2222 pdu->hdr.common.pdu_type, 2223 expected_hlen, pdu->hdr.common.hlen, tqpair); 2224 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2225 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 2226 goto err; 2227 } else if (pdo_error) { 2228 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2229 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 2230 } else if (plen_error) { 2231 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2232 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2233 goto err; 2234 } else { 2235 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2236 nvme_tcp_pdu_calc_psh_len(tqpair->pdu_in_progress, tqpair->host_hdgst_enable); 2237 return; 2238 } 2239 err: 2240 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2241 } 2242 2243 static int 2244 nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 2245 { 2246 int rc = 0; 2247 struct nvme_tcp_pdu *pdu; 2248 enum nvme_tcp_pdu_recv_state prev_state; 2249 uint32_t data_len; 2250 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 2251 struct spdk_nvmf_tcp_transport, transport); 2252 2253 /* The loop here is to allow for several back-to-back state changes. */ 2254 do { 2255 prev_state = tqpair->recv_state; 2256 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 2257 2258 pdu = tqpair->pdu_in_progress; 2259 assert(pdu || tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2260 switch (tqpair->recv_state) { 2261 /* Wait for the common header */ 2262 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 2263 if (!pdu) { 2264 pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 2265 if (spdk_unlikely(!pdu)) { 2266 return NVME_TCP_PDU_IN_PROGRESS; 2267 } 2268 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 2269 tqpair->pdu_in_progress = pdu; 2270 tqpair->tcp_pdu_working_count++; 2271 } 2272 memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair)); 2273 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2274 /* FALLTHROUGH */ 2275 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 2276 if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 2277 return rc; 2278 } 2279 2280 rc = nvme_tcp_read_data(tqpair->sock, 2281 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 2282 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 2283 if (rc < 0) { 2284 SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair); 2285 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2286 break; 2287 } else if (rc > 0) { 2288 pdu->ch_valid_bytes += rc; 2289 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair); 2290 } 2291 2292 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 2293 return NVME_TCP_PDU_IN_PROGRESS; 2294 } 2295 2296 /* The command header of this PDU has now been read from the socket. */ 2297 nvmf_tcp_pdu_ch_handle(tqpair); 2298 break; 2299 /* Wait for the pdu specific header */ 2300 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 2301 rc = nvme_tcp_read_data(tqpair->sock, 2302 pdu->psh_len - pdu->psh_valid_bytes, 2303 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 2304 if (rc < 0) { 2305 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2306 break; 2307 } else if (rc > 0) { 2308 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair); 2309 pdu->psh_valid_bytes += rc; 2310 } 2311 2312 if (pdu->psh_valid_bytes < pdu->psh_len) { 2313 return NVME_TCP_PDU_IN_PROGRESS; 2314 } 2315 2316 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 2317 nvmf_tcp_pdu_psh_handle(tqpair, ttransport); 2318 break; 2319 /* Wait for the req slot */ 2320 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: 2321 nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 2322 break; 2323 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 2324 /* check whether the data is valid, if not we just return */ 2325 if (!pdu->data_len) { 2326 return NVME_TCP_PDU_IN_PROGRESS; 2327 } 2328 2329 data_len = pdu->data_len; 2330 /* data digest */ 2331 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 2332 tqpair->host_ddgst_enable)) { 2333 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2334 pdu->ddgst_enable = true; 2335 } 2336 2337 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2338 if (rc < 0) { 2339 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2340 break; 2341 } 2342 pdu->rw_offset += rc; 2343 2344 if (pdu->rw_offset < data_len) { 2345 return NVME_TCP_PDU_IN_PROGRESS; 2346 } 2347 2348 /* Generate and insert DIF to whole data block received if DIF is enabled */ 2349 if (spdk_unlikely(pdu->dif_ctx != NULL) && 2350 spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 0, data_len, 2351 pdu->dif_ctx) != 0) { 2352 SPDK_ERRLOG("DIF generate failed\n"); 2353 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2354 break; 2355 } 2356 2357 /* All of this PDU has now been read from the socket. */ 2358 nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2359 break; 2360 case NVME_TCP_PDU_RECV_STATE_QUIESCING: 2361 if (tqpair->tcp_pdu_working_count != 0) { 2362 return NVME_TCP_PDU_IN_PROGRESS; 2363 } 2364 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2365 break; 2366 case NVME_TCP_PDU_RECV_STATE_ERROR: 2367 if (!spdk_sock_is_connected(tqpair->sock)) { 2368 return NVME_TCP_PDU_FATAL; 2369 } 2370 return NVME_TCP_PDU_IN_PROGRESS; 2371 default: 2372 SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state); 2373 abort(); 2374 break; 2375 } 2376 } while (tqpair->recv_state != prev_state); 2377 2378 return rc; 2379 } 2380 2381 static inline void * 2382 nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list) 2383 { 2384 struct spdk_nvmf_tcp_control_msg *msg; 2385 2386 assert(list); 2387 2388 msg = STAILQ_FIRST(&list->free_msgs); 2389 if (!msg) { 2390 SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n"); 2391 return NULL; 2392 } 2393 STAILQ_REMOVE_HEAD(&list->free_msgs, link); 2394 return msg; 2395 } 2396 2397 static inline void 2398 nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg) 2399 { 2400 struct spdk_nvmf_tcp_control_msg *msg = _msg; 2401 2402 assert(list); 2403 STAILQ_INSERT_HEAD(&list->free_msgs, msg, link); 2404 } 2405 2406 static int 2407 nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, 2408 struct spdk_nvmf_transport *transport, 2409 struct spdk_nvmf_transport_poll_group *group) 2410 { 2411 struct spdk_nvmf_request *req = &tcp_req->req; 2412 struct spdk_nvme_cmd *cmd; 2413 struct spdk_nvme_sgl_descriptor *sgl; 2414 struct spdk_nvmf_tcp_poll_group *tgroup; 2415 enum spdk_nvme_tcp_term_req_fes fes; 2416 struct nvme_tcp_pdu *pdu; 2417 struct spdk_nvmf_tcp_qpair *tqpair; 2418 uint32_t length, error_offset = 0; 2419 2420 cmd = &req->cmd->nvme_cmd; 2421 sgl = &cmd->dptr.sgl1; 2422 2423 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 2424 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 2425 /* get request length from sgl */ 2426 length = sgl->unkeyed.length; 2427 if (spdk_unlikely(length > transport->opts.max_io_size)) { 2428 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 2429 length, transport->opts.max_io_size); 2430 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2431 goto fatal_err; 2432 } 2433 2434 /* fill request length and populate iovs */ 2435 req->length = length; 2436 2437 SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length); 2438 2439 if (spdk_unlikely(req->dif_enabled)) { 2440 req->dif.orig_length = length; 2441 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2442 req->dif.elba_length = length; 2443 } 2444 2445 if (nvmf_ctrlr_use_zcopy(req)) { 2446 SPDK_DEBUGLOG(nvmf_tcp, "Using zero-copy to execute request %p\n", tcp_req); 2447 req->data_from_pool = false; 2448 return 0; 2449 } 2450 2451 if (spdk_nvmf_request_get_buffers(req, group, transport, length)) { 2452 /* No available buffers. Queue this request up. */ 2453 SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n", 2454 tcp_req); 2455 return 0; 2456 } 2457 2458 /* backward compatible */ 2459 req->data = req->iov[0].iov_base; 2460 2461 SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n", 2462 tcp_req, req->iovcnt, req->iov[0].iov_base); 2463 2464 return 0; 2465 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2466 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2467 uint64_t offset = sgl->address; 2468 uint32_t max_len = transport->opts.in_capsule_data_size; 2469 2470 assert(tcp_req->has_in_capsule_data); 2471 /* Capsule Cmd with In-capsule Data should get data length from pdu header */ 2472 tqpair = tcp_req->pdu->qpair; 2473 /* receiving pdu is not same with the pdu in tcp_req */ 2474 pdu = tqpair->pdu_in_progress; 2475 length = pdu->hdr.common.plen - pdu->psh_len - sizeof(struct spdk_nvme_tcp_common_pdu_hdr); 2476 if (tqpair->host_ddgst_enable) { 2477 length -= SPDK_NVME_TCP_DIGEST_LEN; 2478 } 2479 /* This error is not defined in NVMe/TCP spec, take this error as fatal error */ 2480 if (spdk_unlikely(length != sgl->unkeyed.length)) { 2481 SPDK_ERRLOG("In-Capsule Data length 0x%x is not equal to SGL data length 0x%x\n", 2482 length, sgl->unkeyed.length); 2483 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2484 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2485 goto fatal_err; 2486 } 2487 2488 SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2489 offset, length); 2490 2491 /* The NVMe/TCP transport does not use ICDOFF to control the in-capsule data offset. ICDOFF should be '0' */ 2492 if (spdk_unlikely(offset != 0)) { 2493 /* Not defined fatal error in NVMe/TCP spec, handle this error as a fatal error */ 2494 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " should be ZERO in NVMe/TCP\n", offset); 2495 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2496 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.address); 2497 goto fatal_err; 2498 } 2499 2500 if (spdk_unlikely(length > max_len)) { 2501 /* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */ 2502 if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE && 2503 (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) { 2504 2505 /* Get a buffer from dedicated list */ 2506 SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n"); 2507 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2508 assert(tgroup->control_msg_list); 2509 req->iov[0].iov_base = nvmf_tcp_control_msg_get(tgroup->control_msg_list); 2510 if (!req->iov[0].iov_base) { 2511 /* No available buffers. Queue this request up. */ 2512 SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req); 2513 return 0; 2514 } 2515 } else { 2516 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2517 length, max_len); 2518 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2519 goto fatal_err; 2520 } 2521 } else { 2522 req->iov[0].iov_base = tcp_req->buf; 2523 } 2524 2525 req->length = length; 2526 req->data_from_pool = false; 2527 req->data = req->iov[0].iov_base; 2528 2529 if (spdk_unlikely(req->dif_enabled)) { 2530 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2531 req->dif.elba_length = length; 2532 } 2533 2534 req->iov[0].iov_len = length; 2535 req->iovcnt = 1; 2536 2537 return 0; 2538 } 2539 /* If we want to handle the problem here, then we can't skip the following data segment. 2540 * Because this function runs before reading data part, now handle all errors as fatal errors. */ 2541 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2542 sgl->generic.type, sgl->generic.subtype); 2543 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2544 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.generic); 2545 fatal_err: 2546 nvmf_tcp_send_c2h_term_req(tcp_req->pdu->qpair, tcp_req->pdu, fes, error_offset); 2547 return -1; 2548 } 2549 2550 static inline enum spdk_nvme_media_error_status_code 2551 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) { 2552 enum spdk_nvme_media_error_status_code result; 2553 2554 switch (err_type) 2555 { 2556 case SPDK_DIF_REFTAG_ERROR: 2557 result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; 2558 break; 2559 case SPDK_DIF_APPTAG_ERROR: 2560 result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; 2561 break; 2562 case SPDK_DIF_GUARD_ERROR: 2563 result = SPDK_NVME_SC_GUARD_CHECK_ERROR; 2564 break; 2565 default: 2566 SPDK_UNREACHABLE(); 2567 break; 2568 } 2569 2570 return result; 2571 } 2572 2573 static void 2574 _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2575 struct spdk_nvmf_tcp_req *tcp_req) 2576 { 2577 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF( 2578 tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 2579 struct nvme_tcp_pdu *rsp_pdu; 2580 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2581 uint32_t plen, pdo, alignment; 2582 int rc; 2583 2584 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2585 2586 rsp_pdu = tcp_req->pdu; 2587 assert(rsp_pdu != NULL); 2588 2589 c2h_data = &rsp_pdu->hdr.c2h_data; 2590 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2591 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2592 2593 if (tqpair->host_hdgst_enable) { 2594 plen += SPDK_NVME_TCP_DIGEST_LEN; 2595 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2596 } 2597 2598 /* set the psh */ 2599 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2600 c2h_data->datal = tcp_req->req.length - tcp_req->pdu->rw_offset; 2601 c2h_data->datao = tcp_req->pdu->rw_offset; 2602 2603 /* set the padding */ 2604 rsp_pdu->padding_len = 0; 2605 pdo = plen; 2606 if (tqpair->cpda) { 2607 alignment = (tqpair->cpda + 1) << 2; 2608 if (plen % alignment != 0) { 2609 pdo = (plen + alignment) / alignment * alignment; 2610 rsp_pdu->padding_len = pdo - plen; 2611 plen = pdo; 2612 } 2613 } 2614 2615 c2h_data->common.pdo = pdo; 2616 plen += c2h_data->datal; 2617 if (tqpair->host_ddgst_enable) { 2618 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2619 plen += SPDK_NVME_TCP_DIGEST_LEN; 2620 } 2621 2622 c2h_data->common.plen = plen; 2623 2624 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2625 rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 2626 } 2627 2628 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2629 c2h_data->datao, c2h_data->datal); 2630 2631 2632 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2633 /* Need to send the capsule response if response is not all 0 */ 2634 if (ttransport->tcp_opts.c2h_success && 2635 tcp_req->rsp.cdw0 == 0 && tcp_req->rsp.cdw1 == 0) { 2636 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2637 } 2638 2639 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2640 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2641 struct spdk_dif_error err_blk = {}; 2642 uint32_t mapped_length = 0; 2643 uint32_t available_iovs = SPDK_COUNTOF(rsp_pdu->iov); 2644 uint32_t ddgst_len = 0; 2645 2646 if (tqpair->host_ddgst_enable) { 2647 /* Data digest consumes additional iov entry */ 2648 available_iovs--; 2649 /* plen needs to be updated since nvme_tcp_build_iovs compares expected and actual plen */ 2650 ddgst_len = SPDK_NVME_TCP_DIGEST_LEN; 2651 c2h_data->common.plen -= ddgst_len; 2652 } 2653 /* Temp call to estimate if data can be described by limited number of iovs. 2654 * iov vector will be rebuilt in nvmf_tcp_qpair_write_pdu */ 2655 nvme_tcp_build_iovs(rsp_pdu->iov, available_iovs, rsp_pdu, tqpair->host_hdgst_enable, 2656 false, &mapped_length); 2657 2658 if (mapped_length != c2h_data->common.plen) { 2659 c2h_data->datal = mapped_length - (c2h_data->common.plen - c2h_data->datal); 2660 SPDK_DEBUGLOG(nvmf_tcp, 2661 "Part C2H, data_len %u (of %u), PDU len %u, updated PDU len %u, offset %u\n", 2662 c2h_data->datal, tcp_req->req.length, c2h_data->common.plen, mapped_length, rsp_pdu->rw_offset); 2663 c2h_data->common.plen = mapped_length; 2664 2665 /* Rebuild pdu->data_iov since data length is changed */ 2666 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, c2h_data->datao, 2667 c2h_data->datal); 2668 2669 c2h_data->common.flags &= ~(SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU | 2670 SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS); 2671 } 2672 2673 c2h_data->common.plen += ddgst_len; 2674 2675 assert(rsp_pdu->rw_offset <= tcp_req->req.length); 2676 2677 rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt, 2678 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk); 2679 if (rc != 0) { 2680 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2681 err_blk.err_type, err_blk.err_offset); 2682 rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; 2683 rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type); 2684 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2685 return; 2686 } 2687 } 2688 2689 rsp_pdu->rw_offset += c2h_data->datal; 2690 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2691 } 2692 2693 static void 2694 nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2695 struct spdk_nvmf_tcp_req *tcp_req) 2696 { 2697 nvmf_tcp_req_pdu_init(tcp_req); 2698 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2699 } 2700 2701 static int 2702 request_transfer_out(struct spdk_nvmf_request *req) 2703 { 2704 struct spdk_nvmf_tcp_req *tcp_req; 2705 struct spdk_nvmf_qpair *qpair; 2706 struct spdk_nvmf_tcp_qpair *tqpair; 2707 struct spdk_nvme_cpl *rsp; 2708 2709 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2710 2711 qpair = req->qpair; 2712 rsp = &req->rsp->nvme_cpl; 2713 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2714 2715 /* Advance our sq_head pointer */ 2716 if (qpair->sq_head == qpair->sq_head_max) { 2717 qpair->sq_head = 0; 2718 } else { 2719 qpair->sq_head++; 2720 } 2721 rsp->sqhd = qpair->sq_head; 2722 2723 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2724 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2725 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2726 nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2727 } else { 2728 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2729 } 2730 2731 return 0; 2732 } 2733 2734 static void 2735 nvmf_tcp_check_fused_ordering(struct spdk_nvmf_tcp_transport *ttransport, 2736 struct spdk_nvmf_tcp_qpair *tqpair, 2737 struct spdk_nvmf_tcp_req *tcp_req) 2738 { 2739 enum spdk_nvme_cmd_fuse last, next; 2740 2741 last = tqpair->fused_first ? tqpair->fused_first->cmd.fuse : SPDK_NVME_CMD_FUSE_NONE; 2742 next = tcp_req->cmd.fuse; 2743 2744 assert(last != SPDK_NVME_CMD_FUSE_SECOND); 2745 2746 if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) { 2747 return; 2748 } 2749 2750 if (last == SPDK_NVME_CMD_FUSE_FIRST) { 2751 if (next == SPDK_NVME_CMD_FUSE_SECOND) { 2752 /* This is a valid pair of fused commands. Point them at each other 2753 * so they can be submitted consecutively once ready to be executed. 2754 */ 2755 tqpair->fused_first->fused_pair = tcp_req; 2756 tcp_req->fused_pair = tqpair->fused_first; 2757 tqpair->fused_first = NULL; 2758 return; 2759 } else { 2760 /* Mark the last req as failed since it wasn't followed by a SECOND. */ 2761 tqpair->fused_first->fused_failed = true; 2762 2763 /* 2764 * If the last req is in READY_TO_EXECUTE state, then call 2765 * nvmf_tcp_req_process(), otherwise nothing else will kick it. 2766 */ 2767 if (tqpair->fused_first->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 2768 nvmf_tcp_req_process(ttransport, tqpair->fused_first); 2769 } 2770 2771 tqpair->fused_first = NULL; 2772 } 2773 } 2774 2775 if (next == SPDK_NVME_CMD_FUSE_FIRST) { 2776 /* Set tqpair->fused_first here so that we know to check that the next request 2777 * is a SECOND (and to fail this one if it isn't). 2778 */ 2779 tqpair->fused_first = tcp_req; 2780 } else if (next == SPDK_NVME_CMD_FUSE_SECOND) { 2781 /* Mark this req failed since it is a SECOND and the last one was not a FIRST. */ 2782 tcp_req->fused_failed = true; 2783 } 2784 } 2785 2786 static bool 2787 nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 2788 struct spdk_nvmf_tcp_req *tcp_req) 2789 { 2790 struct spdk_nvmf_tcp_qpair *tqpair; 2791 uint32_t plen; 2792 struct nvme_tcp_pdu *pdu; 2793 enum spdk_nvmf_tcp_req_state prev_state; 2794 bool progress = false; 2795 struct spdk_nvmf_transport *transport = &ttransport->transport; 2796 struct spdk_nvmf_transport_poll_group *group; 2797 struct spdk_nvmf_tcp_poll_group *tgroup; 2798 2799 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2800 group = &tqpair->group->group; 2801 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 2802 2803 /* If the qpair is not active, we need to abort the outstanding requests. */ 2804 if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 2805 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 2806 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2807 } 2808 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 2809 } 2810 2811 /* The loop here is to allow for several back-to-back state changes. */ 2812 do { 2813 prev_state = tcp_req->state; 2814 2815 SPDK_DEBUGLOG(nvmf_tcp, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 2816 tqpair); 2817 2818 switch (tcp_req->state) { 2819 case TCP_REQUEST_STATE_FREE: 2820 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 2821 * to escape this state. */ 2822 break; 2823 case TCP_REQUEST_STATE_NEW: 2824 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, tqpair); 2825 2826 /* copy the cmd from the receive pdu */ 2827 tcp_req->cmd = tqpair->pdu_in_progress->hdr.capsule_cmd.ccsqe; 2828 2829 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) { 2830 tcp_req->req.dif_enabled = true; 2831 tqpair->pdu_in_progress->dif_ctx = &tcp_req->req.dif.dif_ctx; 2832 } 2833 2834 nvmf_tcp_check_fused_ordering(ttransport, tqpair, tcp_req); 2835 2836 /* The next state transition depends on the data transfer needs of this request. */ 2837 tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req); 2838 2839 if (spdk_unlikely(tcp_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) { 2840 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2841 tcp_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; 2842 tcp_req->req.rsp->nvme_cpl.cid = tcp_req->req.cmd->nvme_cmd.cid; 2843 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2844 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2845 SPDK_DEBUGLOG(nvmf_tcp, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", tcp_req); 2846 break; 2847 } 2848 2849 /* If no data to transfer, ready to execute. */ 2850 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 2851 /* Reset the tqpair receiving pdu state */ 2852 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2853 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2854 break; 2855 } 2856 2857 pdu = tqpair->pdu_in_progress; 2858 plen = pdu->hdr.common.hlen; 2859 if (tqpair->host_hdgst_enable) { 2860 plen += SPDK_NVME_TCP_DIGEST_LEN; 2861 } 2862 if (pdu->hdr.common.plen != plen) { 2863 tcp_req->has_in_capsule_data = true; 2864 } else { 2865 /* Data is transmitted by C2H PDUs */ 2866 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2867 } 2868 2869 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 2870 STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link); 2871 break; 2872 case TCP_REQUEST_STATE_NEED_BUFFER: 2873 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2874 tqpair); 2875 2876 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 2877 2878 if (!tcp_req->has_in_capsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) { 2879 SPDK_DEBUGLOG(nvmf_tcp, 2880 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", 2881 tcp_req, tqpair); 2882 /* This request needs to wait in line to obtain a buffer */ 2883 break; 2884 } 2885 2886 /* Try to get a data buffer */ 2887 if (nvmf_tcp_req_parse_sgl(tcp_req, transport, group) < 0) { 2888 break; 2889 } 2890 2891 /* Get a zcopy buffer if the request can be serviced through zcopy */ 2892 if (spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 2893 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2894 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 2895 tcp_req->req.length = tcp_req->req.dif.elba_length; 2896 } 2897 2898 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2899 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_START); 2900 spdk_nvmf_request_zcopy_start(&tcp_req->req); 2901 break; 2902 } 2903 2904 if (tcp_req->req.iovcnt < 1) { 2905 SPDK_DEBUGLOG(nvmf_tcp, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", 2906 tcp_req, tqpair); 2907 /* No buffers available. */ 2908 break; 2909 } 2910 2911 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2912 2913 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 2914 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2915 if (tcp_req->req.data_from_pool) { 2916 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2917 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2918 } else { 2919 struct nvme_tcp_pdu *pdu; 2920 2921 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2922 2923 pdu = tqpair->pdu_in_progress; 2924 SPDK_DEBUGLOG(nvmf_tcp, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 2925 tqpair); 2926 /* No need to send r2t, contained in the capsuled data */ 2927 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2928 0, tcp_req->req.length); 2929 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2930 } 2931 break; 2932 } 2933 2934 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2935 break; 2936 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 2937 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, tqpair->qpair.qid, 0, 2938 (uintptr_t)tcp_req, tqpair); 2939 /* Some external code must kick a request into TCP_REQUEST_STATE_ZCOPY_START_COMPLETED 2940 * to escape this state. */ 2941 break; 2942 case TCP_REQUEST_STATE_ZCOPY_START_COMPLETED: 2943 spdk_trace_record(TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, tqpair->qpair.qid, 0, 2944 (uintptr_t)tcp_req, tqpair); 2945 if (spdk_unlikely(spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl))) { 2946 SPDK_DEBUGLOG(nvmf_tcp, "Zero-copy start failed for tcp_req(%p) on tqpair=%p\n", 2947 tcp_req, tqpair); 2948 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2949 break; 2950 } 2951 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2952 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2953 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2954 } else { 2955 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 2956 } 2957 break; 2958 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 2959 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2960 tqpair); 2961 /* The R2T completion or the h2c data incoming will kick it out of this state. */ 2962 break; 2963 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2964 2965 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, tqpair->qpair.qid, 0, 2966 (uintptr_t)tcp_req, tqpair); 2967 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 2968 * to escape this state. */ 2969 break; 2970 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 2971 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, tqpair->qpair.qid, 0, 2972 (uintptr_t)tcp_req, tqpair); 2973 2974 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2975 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 2976 tcp_req->req.length = tcp_req->req.dif.elba_length; 2977 } 2978 2979 if (tcp_req->cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) { 2980 if (tcp_req->fused_failed) { 2981 /* This request failed FUSED semantics. Fail it immediately, without 2982 * even sending it to the target layer. 2983 */ 2984 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2985 tcp_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; 2986 tcp_req->req.rsp->nvme_cpl.cid = tcp_req->req.cmd->nvme_cmd.cid; 2987 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2988 break; 2989 } 2990 2991 if (tcp_req->fused_pair == NULL || 2992 tcp_req->fused_pair->state != TCP_REQUEST_STATE_READY_TO_EXECUTE) { 2993 /* This request is ready to execute, but either we don't know yet if it's 2994 * valid - i.e. this is a FIRST but we haven't received the next request yet), 2995 * or the other request of this fused pair isn't ready to execute. So 2996 * break here and this request will get processed later either when the 2997 * other request is ready or we find that this request isn't valid. 2998 */ 2999 break; 3000 } 3001 } 3002 3003 if (!spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 3004 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 3005 /* If we get to this point, and this request is a fused command, we know that 3006 * it is part of a valid sequence (FIRST followed by a SECOND) and that both 3007 * requests are READY_TO_EXECUTE. So call spdk_nvmf_request_exec() both on this 3008 * request, and the other request of the fused pair, in the correct order. 3009 * Also clear the ->fused_pair pointers on both requests, since after this point 3010 * we no longer need to maintain the relationship between these two requests. 3011 */ 3012 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) { 3013 assert(tcp_req->fused_pair != NULL); 3014 assert(tcp_req->fused_pair->fused_pair == tcp_req); 3015 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 3016 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 3017 tcp_req->fused_pair->fused_pair = NULL; 3018 tcp_req->fused_pair = NULL; 3019 } 3020 spdk_nvmf_request_exec(&tcp_req->req); 3021 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) { 3022 assert(tcp_req->fused_pair != NULL); 3023 assert(tcp_req->fused_pair->fused_pair == tcp_req); 3024 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 3025 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 3026 tcp_req->fused_pair->fused_pair = NULL; 3027 tcp_req->fused_pair = NULL; 3028 } 3029 } else { 3030 /* For zero-copy, only requests with data coming from host to the 3031 * controller can end up here. */ 3032 assert(tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER); 3033 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT); 3034 spdk_nvmf_request_zcopy_end(&tcp_req->req, true); 3035 } 3036 3037 break; 3038 case TCP_REQUEST_STATE_EXECUTING: 3039 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 3040 tqpair); 3041 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 3042 * to escape this state. */ 3043 break; 3044 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3045 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, tqpair->qpair.qid, 0, 3046 (uintptr_t)tcp_req, tqpair); 3047 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 3048 * to escape this state. */ 3049 break; 3050 case TCP_REQUEST_STATE_EXECUTED: 3051 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 3052 tqpair); 3053 3054 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 3055 tcp_req->req.length = tcp_req->req.dif.orig_length; 3056 } 3057 3058 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3059 break; 3060 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 3061 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, tqpair->qpair.qid, 0, 3062 (uintptr_t)tcp_req, tqpair); 3063 if (request_transfer_out(&tcp_req->req) != 0) { 3064 assert(0); /* No good way to handle this currently */ 3065 } 3066 break; 3067 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 3068 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, tqpair->qpair.qid, 0, 3069 (uintptr_t)tcp_req, tqpair); 3070 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 3071 * to escape this state. */ 3072 break; 3073 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 3074 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, tqpair->qpair.qid, 0, 3075 (uintptr_t)tcp_req, tqpair); 3076 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 3077 * to escape this state. */ 3078 break; 3079 case TCP_REQUEST_STATE_COMPLETED: 3080 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 3081 tqpair); 3082 /* If there's an outstanding PDU sent to the host, the request is completed 3083 * due to the qpair being disconnected. We must delay the completion until 3084 * that write is done to avoid freeing the request twice. */ 3085 if (spdk_unlikely(tcp_req->pdu_in_use)) { 3086 SPDK_DEBUGLOG(nvmf_tcp, "Delaying completion due to outstanding " 3087 "write on req=%p\n", tcp_req); 3088 /* This can only happen for zcopy requests */ 3089 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 3090 assert(tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE); 3091 break; 3092 } 3093 3094 if (tcp_req->req.data_from_pool) { 3095 spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); 3096 } else if (spdk_unlikely(tcp_req->has_in_capsule_data && 3097 (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC || 3098 tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) { 3099 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3100 assert(tgroup->control_msg_list); 3101 SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n"); 3102 nvmf_tcp_control_msg_put(tgroup->control_msg_list, 3103 tcp_req->req.iov[0].iov_base); 3104 } else if (tcp_req->req.zcopy_bdev_io != NULL) { 3105 /* If the request has an unreleased zcopy bdev_io, it's either a 3106 * read, a failed write, or the qpair is being disconnected */ 3107 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 3108 assert(tcp_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 3109 spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl) || 3110 tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE); 3111 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE); 3112 spdk_nvmf_request_zcopy_end(&tcp_req->req, false); 3113 break; 3114 } 3115 tcp_req->req.length = 0; 3116 tcp_req->req.iovcnt = 0; 3117 tcp_req->req.data = NULL; 3118 tcp_req->fused_failed = false; 3119 if (tcp_req->fused_pair) { 3120 /* This req was part of a valid fused pair, but failed before it got to 3121 * READ_TO_EXECUTE state. This means we need to fail the other request 3122 * in the pair, because it is no longer part of a valid pair. If the pair 3123 * already reached READY_TO_EXECUTE state, we need to kick it. 3124 */ 3125 tcp_req->fused_pair->fused_failed = true; 3126 if (tcp_req->fused_pair->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 3127 nvmf_tcp_req_process(ttransport, tcp_req->fused_pair); 3128 } 3129 tcp_req->fused_pair = NULL; 3130 } 3131 3132 nvmf_tcp_req_put(tqpair, tcp_req); 3133 break; 3134 case TCP_REQUEST_NUM_STATES: 3135 default: 3136 assert(0); 3137 break; 3138 } 3139 3140 if (tcp_req->state != prev_state) { 3141 progress = true; 3142 } 3143 } while (tcp_req->state != prev_state); 3144 3145 return progress; 3146 } 3147 3148 static void 3149 nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 3150 { 3151 struct spdk_nvmf_tcp_qpair *tqpair = arg; 3152 int rc; 3153 3154 assert(tqpair != NULL); 3155 rc = nvmf_tcp_sock_process(tqpair); 3156 3157 /* If there was a new socket error, disconnect */ 3158 if (rc < 0) { 3159 nvmf_tcp_qpair_disconnect(tqpair); 3160 } 3161 } 3162 3163 static int 3164 nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 3165 struct spdk_nvmf_qpair *qpair) 3166 { 3167 struct spdk_nvmf_tcp_poll_group *tgroup; 3168 struct spdk_nvmf_tcp_qpair *tqpair; 3169 int rc; 3170 3171 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3172 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3173 3174 rc = nvmf_tcp_qpair_sock_init(tqpair); 3175 if (rc != 0) { 3176 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 3177 return -1; 3178 } 3179 3180 rc = nvmf_tcp_qpair_init(&tqpair->qpair); 3181 if (rc < 0) { 3182 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 3183 return -1; 3184 } 3185 3186 rc = nvmf_tcp_qpair_init_mem_resource(tqpair); 3187 if (rc < 0) { 3188 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 3189 return -1; 3190 } 3191 3192 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 3193 nvmf_tcp_sock_cb, tqpair); 3194 if (rc != 0) { 3195 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 3196 spdk_strerror(errno), errno); 3197 return -1; 3198 } 3199 3200 tqpair->group = tgroup; 3201 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_INVALID); 3202 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 3203 3204 return 0; 3205 } 3206 3207 static int 3208 nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 3209 struct spdk_nvmf_qpair *qpair) 3210 { 3211 struct spdk_nvmf_tcp_poll_group *tgroup; 3212 struct spdk_nvmf_tcp_qpair *tqpair; 3213 int rc; 3214 3215 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3216 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3217 3218 assert(tqpair->group == tgroup); 3219 3220 SPDK_DEBUGLOG(nvmf_tcp, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 3221 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 3222 TAILQ_REMOVE(&tgroup->await_req, tqpair, link); 3223 } else { 3224 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 3225 } 3226 3227 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 3228 if (rc != 0) { 3229 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 3230 spdk_strerror(errno), errno); 3231 } 3232 3233 return rc; 3234 } 3235 3236 static int 3237 nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 3238 { 3239 struct spdk_nvmf_tcp_transport *ttransport; 3240 struct spdk_nvmf_tcp_req *tcp_req; 3241 3242 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3243 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 3244 3245 switch (tcp_req->state) { 3246 case TCP_REQUEST_STATE_EXECUTING: 3247 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3248 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 3249 break; 3250 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3251 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_ZCOPY_START_COMPLETED); 3252 break; 3253 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 3254 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 3255 break; 3256 default: 3257 assert(0 && "Unexpected request state"); 3258 break; 3259 } 3260 3261 nvmf_tcp_req_process(ttransport, tcp_req); 3262 3263 return 0; 3264 } 3265 3266 static void 3267 nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair, 3268 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 3269 { 3270 struct spdk_nvmf_tcp_qpair *tqpair; 3271 3272 SPDK_DEBUGLOG(nvmf_tcp, "Qpair: %p\n", qpair); 3273 3274 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3275 3276 assert(tqpair->fini_cb_fn == NULL); 3277 tqpair->fini_cb_fn = cb_fn; 3278 tqpair->fini_cb_arg = cb_arg; 3279 3280 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_EXITED); 3281 nvmf_tcp_qpair_destroy(tqpair); 3282 } 3283 3284 static int 3285 nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 3286 { 3287 struct spdk_nvmf_tcp_poll_group *tgroup; 3288 int rc; 3289 struct spdk_nvmf_request *req, *req_tmp; 3290 struct spdk_nvmf_tcp_req *tcp_req; 3291 struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp; 3292 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport, 3293 struct spdk_nvmf_tcp_transport, transport); 3294 3295 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3296 3297 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) { 3298 return 0; 3299 } 3300 3301 STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) { 3302 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 3303 if (nvmf_tcp_req_process(ttransport, tcp_req) == false) { 3304 break; 3305 } 3306 } 3307 3308 rc = spdk_sock_group_poll(tgroup->sock_group); 3309 if (rc < 0) { 3310 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 3311 } 3312 3313 TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) { 3314 rc = nvmf_tcp_sock_process(tqpair); 3315 3316 /* If there was a new socket error, disconnect */ 3317 if (rc < 0) { 3318 nvmf_tcp_qpair_disconnect(tqpair); 3319 } 3320 } 3321 3322 return rc; 3323 } 3324 3325 static int 3326 nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 3327 struct spdk_nvme_transport_id *trid, bool peer) 3328 { 3329 struct spdk_nvmf_tcp_qpair *tqpair; 3330 uint16_t port; 3331 3332 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3333 spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP); 3334 3335 if (peer) { 3336 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 3337 port = tqpair->initiator_port; 3338 } else { 3339 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 3340 port = tqpair->target_port; 3341 } 3342 3343 if (spdk_sock_is_ipv4(tqpair->sock)) { 3344 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 3345 } else if (spdk_sock_is_ipv6(tqpair->sock)) { 3346 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 3347 } else { 3348 return -1; 3349 } 3350 3351 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 3352 return 0; 3353 } 3354 3355 static int 3356 nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 3357 struct spdk_nvme_transport_id *trid) 3358 { 3359 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3360 } 3361 3362 static int 3363 nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 3364 struct spdk_nvme_transport_id *trid) 3365 { 3366 return nvmf_tcp_qpair_get_trid(qpair, trid, 1); 3367 } 3368 3369 static int 3370 nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 3371 struct spdk_nvme_transport_id *trid) 3372 { 3373 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3374 } 3375 3376 static void 3377 nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req, 3378 struct spdk_nvmf_tcp_req *tcp_req_to_abort) 3379 { 3380 tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 3381 tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; 3382 tcp_req_to_abort->req.rsp->nvme_cpl.cid = tcp_req_to_abort->req.cmd->nvme_cmd.cid; 3383 3384 nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3385 3386 req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ 3387 } 3388 3389 static int 3390 _nvmf_tcp_qpair_abort_request(void *ctx) 3391 { 3392 struct spdk_nvmf_request *req = ctx; 3393 struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort, 3394 struct spdk_nvmf_tcp_req, req); 3395 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, 3396 struct spdk_nvmf_tcp_qpair, qpair); 3397 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 3398 struct spdk_nvmf_tcp_transport, transport); 3399 int rc; 3400 3401 spdk_poller_unregister(&req->poller); 3402 3403 switch (tcp_req_to_abort->state) { 3404 case TCP_REQUEST_STATE_EXECUTING: 3405 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3406 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3407 rc = nvmf_ctrlr_abort_request(req); 3408 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { 3409 return SPDK_POLLER_BUSY; 3410 } 3411 break; 3412 3413 case TCP_REQUEST_STATE_NEED_BUFFER: 3414 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, 3415 &tcp_req_to_abort->req, spdk_nvmf_request, buf_link); 3416 3417 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 3418 nvmf_tcp_req_process(ttransport, tcp_req_to_abort); 3419 break; 3420 3421 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 3422 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 3423 if (spdk_get_ticks() < req->timeout_tsc) { 3424 req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0); 3425 return SPDK_POLLER_BUSY; 3426 } 3427 break; 3428 3429 default: 3430 /* Requests in other states are either un-abortable (e.g. 3431 * TRANSFERRING_CONTROLLER_TO_HOST) or should never end up here, as they're 3432 * immediately transitioned to other states in nvmf_tcp_req_process() (e.g. 3433 * READY_TO_EXECUTE). But it is fine to end up here, as we'll simply complete the 3434 * abort request with the bit0 of dword0 set (command not aborted). 3435 */ 3436 break; 3437 } 3438 3439 spdk_nvmf_request_complete(req); 3440 return SPDK_POLLER_BUSY; 3441 } 3442 3443 static void 3444 nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 3445 struct spdk_nvmf_request *req) 3446 { 3447 struct spdk_nvmf_tcp_qpair *tqpair; 3448 struct spdk_nvmf_tcp_transport *ttransport; 3449 struct spdk_nvmf_transport *transport; 3450 uint16_t cid; 3451 uint32_t i; 3452 struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL; 3453 3454 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3455 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3456 transport = &ttransport->transport; 3457 3458 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 3459 3460 for (i = 0; i < tqpair->resource_count; i++) { 3461 if (tqpair->reqs[i].state != TCP_REQUEST_STATE_FREE && 3462 tqpair->reqs[i].req.cmd->nvme_cmd.cid == cid) { 3463 tcp_req_to_abort = &tqpair->reqs[i]; 3464 break; 3465 } 3466 } 3467 3468 spdk_trace_record(TRACE_TCP_QP_ABORT_REQ, qpair->qid, 0, (uintptr_t)req, tqpair); 3469 3470 if (tcp_req_to_abort == NULL) { 3471 spdk_nvmf_request_complete(req); 3472 return; 3473 } 3474 3475 req->req_to_abort = &tcp_req_to_abort->req; 3476 req->timeout_tsc = spdk_get_ticks() + 3477 transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); 3478 req->poller = NULL; 3479 3480 _nvmf_tcp_qpair_abort_request(req); 3481 } 3482 3483 struct tcp_subsystem_add_host_opts { 3484 char *psk; 3485 }; 3486 3487 static const struct spdk_json_object_decoder tcp_subsystem_add_host_opts_decoder[] = { 3488 {"psk", offsetof(struct tcp_subsystem_add_host_opts, psk), spdk_json_decode_string, true}, 3489 }; 3490 3491 static int 3492 nvmf_tcp_subsystem_add_host(struct spdk_nvmf_transport *transport, 3493 const struct spdk_nvmf_subsystem *subsystem, 3494 const char *hostnqn, 3495 const struct spdk_json_val *transport_specific) 3496 { 3497 struct tcp_subsystem_add_host_opts opts; 3498 struct spdk_nvmf_tcp_transport *ttransport; 3499 struct tcp_psk_entry *entry; 3500 char psk_identity[NVMF_PSK_IDENTITY_LEN]; 3501 uint64_t key_len; 3502 int rc = 0; 3503 3504 if (transport_specific == NULL) { 3505 return 0; 3506 } 3507 3508 assert(transport != NULL); 3509 assert(subsystem != NULL); 3510 3511 memset(&opts, 0, sizeof(opts)); 3512 3513 /* Decode PSK */ 3514 if (spdk_json_decode_object_relaxed(transport_specific, tcp_subsystem_add_host_opts_decoder, 3515 SPDK_COUNTOF(tcp_subsystem_add_host_opts_decoder), &opts)) { 3516 SPDK_ERRLOG("spdk_json_decode_object failed\n"); 3517 return -EINVAL; 3518 } 3519 3520 if (opts.psk == NULL) { 3521 return 0; 3522 } 3523 3524 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 3525 /* Generate PSK identity. */ 3526 rc = nvme_tcp_generate_psk_identity(psk_identity, NVMF_PSK_IDENTITY_LEN, hostnqn, 3527 subsystem->subnqn); 3528 if (rc) { 3529 rc = -EINVAL; 3530 goto end; 3531 } 3532 /* Check if PSK identity entry already exists. */ 3533 TAILQ_FOREACH(entry, &ttransport->psks, link) { 3534 if (strncmp(entry->psk_identity, psk_identity, NVMF_PSK_IDENTITY_LEN) == 0) { 3535 SPDK_ERRLOG("Given PSK identity: %s entry already exists!\n", psk_identity); 3536 rc = -EEXIST; 3537 goto end; 3538 } 3539 } 3540 entry = calloc(1, sizeof(struct tcp_psk_entry)); 3541 if (entry == NULL) { 3542 SPDK_ERRLOG("Unable to allocate memory for PSK entry!\n"); 3543 rc = -ENOMEM; 3544 goto end; 3545 } 3546 3547 if (snprintf(entry->hostnqn, sizeof(entry->hostnqn), "%s", hostnqn) < 0) { 3548 SPDK_ERRLOG("Could not write hostnqn string!\n"); 3549 rc = -EINVAL; 3550 free(entry); 3551 goto end; 3552 } 3553 if (snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn) < 0) { 3554 SPDK_ERRLOG("Could not write subnqn string!\n"); 3555 rc = -EINVAL; 3556 free(entry); 3557 goto end; 3558 } 3559 if (snprintf(entry->psk_identity, sizeof(entry->psk_identity), "%s", psk_identity) < 0) { 3560 SPDK_ERRLOG("Could not write PSK identity string!\n"); 3561 rc = -EINVAL; 3562 free(entry); 3563 goto end; 3564 } 3565 if (strlen(opts.psk) >= sizeof(entry->psk)) { 3566 SPDK_ERRLOG("PSK of length: %ld cannot fit in max buffer size: %ld\n", strlen(opts.psk), 3567 sizeof(entry->psk)); 3568 rc = -EINVAL; 3569 free(entry); 3570 goto end; 3571 } 3572 3573 /* Derive retained PSK. */ 3574 rc = nvme_tcp_derive_retained_psk(opts.psk, hostnqn, entry->psk, SPDK_TLS_PSK_MAX_LEN); 3575 if (rc < 0) { 3576 SPDK_ERRLOG("Unable to derive retained PSK!\n"); 3577 goto end; 3578 } 3579 entry->psk_size = rc; 3580 3581 TAILQ_INSERT_TAIL(&ttransport->psks, entry, link); 3582 rc = 0; 3583 3584 end: 3585 key_len = strnlen(opts.psk, SPDK_TLS_PSK_MAX_LEN); 3586 spdk_memset_s(opts.psk, key_len, 0, key_len); 3587 free(opts.psk); 3588 3589 return rc; 3590 } 3591 3592 static void 3593 nvmf_tcp_subsystem_remove_host(struct spdk_nvmf_transport *transport, 3594 const struct spdk_nvmf_subsystem *subsystem, 3595 const char *hostnqn) 3596 { 3597 struct spdk_nvmf_tcp_transport *ttransport; 3598 struct tcp_psk_entry *entry, *tmp; 3599 3600 assert(transport != NULL); 3601 assert(subsystem != NULL); 3602 3603 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 3604 TAILQ_FOREACH_SAFE(entry, &ttransport->psks, link, tmp) { 3605 if ((strncmp(entry->hostnqn, hostnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0 && 3606 (strncmp(entry->subnqn, subsystem->subnqn, SPDK_NVMF_NQN_MAX_LEN)) == 0) { 3607 TAILQ_REMOVE(&ttransport->psks, entry, link); 3608 spdk_memset_s(entry->psk, sizeof(entry->psk), 0, sizeof(entry->psk)); 3609 free(entry); 3610 break; 3611 } 3612 } 3613 } 3614 3615 static void 3616 nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 3617 { 3618 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 3619 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 3620 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 3621 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 3622 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 3623 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 3624 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 3625 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 3626 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 3627 opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC; 3628 opts->transport_specific = NULL; 3629 } 3630 3631 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 3632 .name = "TCP", 3633 .type = SPDK_NVME_TRANSPORT_TCP, 3634 .opts_init = nvmf_tcp_opts_init, 3635 .create = nvmf_tcp_create, 3636 .dump_opts = nvmf_tcp_dump_opts, 3637 .destroy = nvmf_tcp_destroy, 3638 3639 .listen = nvmf_tcp_listen, 3640 .stop_listen = nvmf_tcp_stop_listen, 3641 3642 .listener_discover = nvmf_tcp_discover, 3643 3644 .poll_group_create = nvmf_tcp_poll_group_create, 3645 .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group, 3646 .poll_group_destroy = nvmf_tcp_poll_group_destroy, 3647 .poll_group_add = nvmf_tcp_poll_group_add, 3648 .poll_group_remove = nvmf_tcp_poll_group_remove, 3649 .poll_group_poll = nvmf_tcp_poll_group_poll, 3650 3651 .req_free = nvmf_tcp_req_free, 3652 .req_complete = nvmf_tcp_req_complete, 3653 3654 .qpair_fini = nvmf_tcp_close_qpair, 3655 .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid, 3656 .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid, 3657 .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid, 3658 .qpair_abort_request = nvmf_tcp_qpair_abort_request, 3659 .subsystem_add_host = nvmf_tcp_subsystem_add_host, 3660 .subsystem_remove_host = nvmf_tcp_subsystem_remove_host, 3661 }; 3662 3663 SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp); 3664 SPDK_LOG_REGISTER_COMPONENT(nvmf_tcp) 3665