1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/accel.h" 8 #include "spdk/stdinc.h" 9 #include "spdk/crc32.h" 10 #include "spdk/endian.h" 11 #include "spdk/assert.h" 12 #include "spdk/thread.h" 13 #include "spdk/nvmf_transport.h" 14 #include "spdk/string.h" 15 #include "spdk/trace.h" 16 #include "spdk/util.h" 17 #include "spdk/log.h" 18 19 #include "spdk_internal/assert.h" 20 #include "spdk_internal/nvme_tcp.h" 21 #include "spdk_internal/sock.h" 22 23 #include "nvmf_internal.h" 24 25 #include "spdk_internal/trace_defs.h" 26 27 #define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16 28 #define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 16 29 #define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0 30 #define SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM 32 31 #define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true 32 33 #define SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH 2 34 #define SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH 65535 35 #define SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH 2 36 #define SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH 4096 37 38 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH 128 39 #define SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH 128 40 #define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128 41 #define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 42 #define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072 43 #define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072 44 #define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511 45 #define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32 46 #define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false 47 #define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1 48 49 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp; 50 51 /* spdk nvmf related structure */ 52 enum spdk_nvmf_tcp_req_state { 53 54 /* The request is not currently in use */ 55 TCP_REQUEST_STATE_FREE = 0, 56 57 /* Initial state when request first received */ 58 TCP_REQUEST_STATE_NEW = 1, 59 60 /* The request is queued until a data buffer is available. */ 61 TCP_REQUEST_STATE_NEED_BUFFER = 2, 62 63 /* The request is waiting for zcopy_start to finish */ 64 TCP_REQUEST_STATE_AWAITING_ZCOPY_START = 3, 65 66 /* The request has received a zero-copy buffer */ 67 TCP_REQUEST_STATE_ZCOPY_START_COMPLETED = 4, 68 69 /* The request is currently transferring data from the host to the controller. */ 70 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER = 5, 71 72 /* The request is waiting for the R2T send acknowledgement. */ 73 TCP_REQUEST_STATE_AWAITING_R2T_ACK = 6, 74 75 /* The request is ready to execute at the block device */ 76 TCP_REQUEST_STATE_READY_TO_EXECUTE = 7, 77 78 /* The request is currently executing at the block device */ 79 TCP_REQUEST_STATE_EXECUTING = 8, 80 81 /* The request is waiting for zcopy buffers to be commited */ 82 TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT = 9, 83 84 /* The request finished executing at the block device */ 85 TCP_REQUEST_STATE_EXECUTED = 10, 86 87 /* The request is ready to send a completion */ 88 TCP_REQUEST_STATE_READY_TO_COMPLETE = 11, 89 90 /* The request is currently transferring final pdus from the controller to the host. */ 91 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST = 12, 92 93 /* The request is waiting for zcopy buffers to be released (without committing) */ 94 TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE = 13, 95 96 /* The request completed and can be marked free. */ 97 TCP_REQUEST_STATE_COMPLETED = 14, 98 99 /* Terminator */ 100 TCP_REQUEST_NUM_STATES, 101 }; 102 103 static const char *spdk_nvmf_tcp_term_req_fes_str[] = { 104 "Invalid PDU Header Field", 105 "PDU Sequence Error", 106 "Header Digiest Error", 107 "Data Transfer Out of Range", 108 "R2T Limit Exceeded", 109 "Unsupported parameter", 110 }; 111 112 SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP) 113 { 114 spdk_trace_register_owner(OWNER_NVMF_TCP, 't'); 115 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r'); 116 spdk_trace_register_description("TCP_REQ_NEW", 117 TRACE_TCP_REQUEST_STATE_NEW, 118 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 1, 119 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 120 spdk_trace_register_description("TCP_REQ_NEED_BUFFER", 121 TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 122 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 123 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 124 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_START", 125 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, 126 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 127 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 128 spdk_trace_register_description("TCP_REQ_ZCPY_START_CPL", 129 TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, 130 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 131 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 132 spdk_trace_register_description("TCP_REQ_TX_H_TO_C", 133 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 134 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 135 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 136 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE", 137 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 138 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 139 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 140 spdk_trace_register_description("TCP_REQ_EXECUTING", 141 TRACE_TCP_REQUEST_STATE_EXECUTING, 142 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 143 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 144 spdk_trace_register_description("TCP_REQ_WAIT_ZCPY_CMT", 145 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, 146 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 147 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 148 spdk_trace_register_description("TCP_REQ_EXECUTED", 149 TRACE_TCP_REQUEST_STATE_EXECUTED, 150 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 151 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 152 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE", 153 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 154 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 155 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 156 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H", 157 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 158 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 159 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 160 spdk_trace_register_description("TCP_REQ_AWAIT_ZCPY_RLS", 161 TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, 162 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 163 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 164 spdk_trace_register_description("TCP_REQ_COMPLETED", 165 TRACE_TCP_REQUEST_STATE_COMPLETED, 166 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 167 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 168 spdk_trace_register_description("TCP_WRITE_START", 169 TRACE_TCP_FLUSH_WRITEBUF_START, 170 OWNER_NVMF_TCP, OBJECT_NONE, 0, 171 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 172 spdk_trace_register_description("TCP_WRITE_DONE", 173 TRACE_TCP_FLUSH_WRITEBUF_DONE, 174 OWNER_NVMF_TCP, OBJECT_NONE, 0, 175 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 176 spdk_trace_register_description("TCP_READ_DONE", 177 TRACE_TCP_READ_FROM_SOCKET_DONE, 178 OWNER_NVMF_TCP, OBJECT_NONE, 0, 179 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 180 spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK", 181 TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 182 OWNER_NVMF_TCP, OBJECT_NVMF_TCP_IO, 0, 183 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 184 185 spdk_trace_register_description("TCP_QP_CREATE", TRACE_TCP_QP_CREATE, 186 OWNER_NVMF_TCP, OBJECT_NONE, 0, 187 SPDK_TRACE_ARG_TYPE_INT, ""); 188 spdk_trace_register_description("TCP_QP_SOCK_INIT", TRACE_TCP_QP_SOCK_INIT, 189 OWNER_NVMF_TCP, OBJECT_NONE, 0, 190 SPDK_TRACE_ARG_TYPE_INT, ""); 191 spdk_trace_register_description("TCP_QP_STATE_CHANGE", TRACE_TCP_QP_STATE_CHANGE, 192 OWNER_NVMF_TCP, OBJECT_NONE, 0, 193 SPDK_TRACE_ARG_TYPE_INT, "state"); 194 spdk_trace_register_description("TCP_QP_DISCONNECT", TRACE_TCP_QP_DISCONNECT, 195 OWNER_NVMF_TCP, OBJECT_NONE, 0, 196 SPDK_TRACE_ARG_TYPE_INT, ""); 197 spdk_trace_register_description("TCP_QP_DESTROY", TRACE_TCP_QP_DESTROY, 198 OWNER_NVMF_TCP, OBJECT_NONE, 0, 199 SPDK_TRACE_ARG_TYPE_INT, ""); 200 spdk_trace_register_description("TCP_QP_ABORT_REQ", TRACE_TCP_QP_ABORT_REQ, 201 OWNER_NVMF_TCP, OBJECT_NONE, 0, 202 SPDK_TRACE_ARG_TYPE_PTR, "qpair"); 203 spdk_trace_register_description("TCP_QP_RCV_STATE_CHANGE", TRACE_TCP_QP_RCV_STATE_CHANGE, 204 OWNER_NVMF_TCP, OBJECT_NONE, 0, 205 SPDK_TRACE_ARG_TYPE_INT, "state"); 206 207 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_NVMF_TCP_IO, 1); 208 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_NVMF_TCP_IO, 0); 209 } 210 211 struct spdk_nvmf_tcp_req { 212 struct spdk_nvmf_request req; 213 struct spdk_nvme_cpl rsp; 214 struct spdk_nvme_cmd cmd; 215 216 /* A PDU that can be used for sending responses. This is 217 * not the incoming PDU! */ 218 struct nvme_tcp_pdu *pdu; 219 220 /* In-capsule data buffer */ 221 uint8_t *buf; 222 223 struct spdk_nvmf_tcp_req *fused_pair; 224 225 /* 226 * The PDU for a request may be used multiple times in serial over 227 * the request's lifetime. For example, first to send an R2T, then 228 * to send a completion. To catch mistakes where the PDU is used 229 * twice at the same time, add a debug flag here for init/fini. 230 */ 231 bool pdu_in_use; 232 bool has_in_capsule_data; 233 bool fused_failed; 234 235 /* transfer_tag */ 236 uint16_t ttag; 237 238 enum spdk_nvmf_tcp_req_state state; 239 240 /* 241 * h2c_offset is used when we receive the h2c_data PDU. 242 */ 243 uint32_t h2c_offset; 244 245 STAILQ_ENTRY(spdk_nvmf_tcp_req) link; 246 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link; 247 }; 248 249 struct spdk_nvmf_tcp_qpair { 250 struct spdk_nvmf_qpair qpair; 251 struct spdk_nvmf_tcp_poll_group *group; 252 struct spdk_sock *sock; 253 254 enum nvme_tcp_pdu_recv_state recv_state; 255 enum nvme_tcp_qpair_state state; 256 257 /* PDU being actively received */ 258 struct nvme_tcp_pdu *pdu_in_progress; 259 260 struct spdk_nvmf_tcp_req *fused_first; 261 262 /* Queues to track the requests in all states */ 263 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_working_queue; 264 TAILQ_HEAD(, spdk_nvmf_tcp_req) tcp_req_free_queue; 265 SLIST_HEAD(, nvme_tcp_pdu) tcp_pdu_free_queue; 266 267 /* Number of requests in each state */ 268 uint32_t state_cntr[TCP_REQUEST_NUM_STATES]; 269 270 uint8_t cpda; 271 272 bool host_hdgst_enable; 273 bool host_ddgst_enable; 274 275 /* This is a spare PDU used for sending special management 276 * operations. Primarily, this is used for the initial 277 * connection response and c2h termination request. */ 278 struct nvme_tcp_pdu *mgmt_pdu; 279 280 /* Arrays of in-capsule buffers, requests, and pdus. 281 * Each array is 'resource_count' number of elements */ 282 void *bufs; 283 struct spdk_nvmf_tcp_req *reqs; 284 struct nvme_tcp_pdu *pdus; 285 uint32_t resource_count; 286 uint32_t recv_buf_size; 287 288 struct spdk_nvmf_tcp_port *port; 289 290 /* IP address */ 291 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 292 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN]; 293 294 /* IP port */ 295 uint16_t initiator_port; 296 uint16_t target_port; 297 298 /* Timer used to destroy qpair after detecting transport error issue if initiator does 299 * not close the connection. 300 */ 301 struct spdk_poller *timeout_poller; 302 303 spdk_nvmf_transport_qpair_fini_cb fini_cb_fn; 304 void *fini_cb_arg; 305 306 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; 307 }; 308 309 struct spdk_nvmf_tcp_control_msg { 310 STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link; 311 }; 312 313 struct spdk_nvmf_tcp_control_msg_list { 314 void *msg_buf; 315 STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs; 316 }; 317 318 struct spdk_nvmf_tcp_poll_group { 319 struct spdk_nvmf_transport_poll_group group; 320 struct spdk_sock_group *sock_group; 321 322 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; 323 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req; 324 325 struct spdk_io_channel *accel_channel; 326 struct spdk_nvmf_tcp_control_msg_list *control_msg_list; 327 328 TAILQ_ENTRY(spdk_nvmf_tcp_poll_group) link; 329 }; 330 331 struct spdk_nvmf_tcp_port { 332 const struct spdk_nvme_transport_id *trid; 333 struct spdk_sock *listen_sock; 334 TAILQ_ENTRY(spdk_nvmf_tcp_port) link; 335 }; 336 337 struct tcp_transport_opts { 338 bool c2h_success; 339 uint16_t control_msg_num; 340 uint32_t sock_priority; 341 }; 342 343 struct spdk_nvmf_tcp_transport { 344 struct spdk_nvmf_transport transport; 345 struct tcp_transport_opts tcp_opts; 346 347 struct spdk_nvmf_tcp_poll_group *next_pg; 348 349 struct spdk_poller *accept_poller; 350 351 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports; 352 TAILQ_HEAD(, spdk_nvmf_tcp_poll_group) poll_groups; 353 }; 354 355 static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = { 356 { 357 "c2h_success", offsetof(struct tcp_transport_opts, c2h_success), 358 spdk_json_decode_bool, true 359 }, 360 { 361 "control_msg_num", offsetof(struct tcp_transport_opts, control_msg_num), 362 spdk_json_decode_uint16, true 363 }, 364 { 365 "sock_priority", offsetof(struct tcp_transport_opts, sock_priority), 366 spdk_json_decode_uint32, true 367 }, 368 }; 369 370 static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 371 struct spdk_nvmf_tcp_req *tcp_req); 372 static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); 373 374 static void _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 375 struct spdk_nvmf_tcp_req *tcp_req); 376 377 static inline void 378 nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, 379 enum spdk_nvmf_tcp_req_state state) 380 { 381 struct spdk_nvmf_qpair *qpair; 382 struct spdk_nvmf_tcp_qpair *tqpair; 383 384 qpair = tcp_req->req.qpair; 385 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 386 387 assert(tqpair->state_cntr[tcp_req->state] > 0); 388 tqpair->state_cntr[tcp_req->state]--; 389 tqpair->state_cntr[state]++; 390 391 tcp_req->state = state; 392 } 393 394 static inline struct nvme_tcp_pdu * 395 nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req) 396 { 397 assert(tcp_req->pdu_in_use == false); 398 399 memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu)); 400 tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 401 402 return tcp_req->pdu; 403 } 404 405 static struct spdk_nvmf_tcp_req * 406 nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair) 407 { 408 struct spdk_nvmf_tcp_req *tcp_req; 409 410 tcp_req = TAILQ_FIRST(&tqpair->tcp_req_free_queue); 411 if (spdk_unlikely(!tcp_req)) { 412 return NULL; 413 } 414 415 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp)); 416 tcp_req->h2c_offset = 0; 417 tcp_req->has_in_capsule_data = false; 418 tcp_req->req.dif_enabled = false; 419 tcp_req->req.zcopy_phase = NVMF_ZCOPY_PHASE_NONE; 420 421 TAILQ_REMOVE(&tqpair->tcp_req_free_queue, tcp_req, state_link); 422 TAILQ_INSERT_TAIL(&tqpair->tcp_req_working_queue, tcp_req, state_link); 423 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW); 424 return tcp_req; 425 } 426 427 static inline void 428 nvmf_tcp_req_put(struct spdk_nvmf_tcp_qpair *tqpair, struct spdk_nvmf_tcp_req *tcp_req) 429 { 430 assert(!tcp_req->pdu_in_use); 431 432 TAILQ_REMOVE(&tqpair->tcp_req_working_queue, tcp_req, state_link); 433 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 434 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE); 435 } 436 437 static void 438 nvmf_tcp_request_free(void *cb_arg) 439 { 440 struct spdk_nvmf_tcp_transport *ttransport; 441 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 442 443 assert(tcp_req != NULL); 444 445 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req=%p will be freed\n", tcp_req); 446 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 447 struct spdk_nvmf_tcp_transport, transport); 448 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 449 nvmf_tcp_req_process(ttransport, tcp_req); 450 } 451 452 static int 453 nvmf_tcp_req_free(struct spdk_nvmf_request *req) 454 { 455 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 456 457 nvmf_tcp_request_free(tcp_req); 458 459 return 0; 460 } 461 462 static void 463 nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair, 464 enum spdk_nvmf_tcp_req_state state) 465 { 466 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 467 468 assert(state != TCP_REQUEST_STATE_FREE); 469 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 470 if (state == tcp_req->state) { 471 nvmf_tcp_request_free(tcp_req); 472 } 473 } 474 } 475 476 static void 477 nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair) 478 { 479 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp; 480 481 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 482 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW); 483 484 /* Wipe the requests waiting for buffer from the global list */ 485 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) { 486 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 487 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req, 488 spdk_nvmf_request, buf_link); 489 } 490 } 491 492 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER); 493 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING); 494 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 495 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 496 } 497 498 static void 499 nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair) 500 { 501 int i; 502 struct spdk_nvmf_tcp_req *tcp_req; 503 504 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid); 505 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) { 506 SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]); 507 TAILQ_FOREACH(tcp_req, &tqpair->tcp_req_working_queue, state_link) { 508 if ((int)tcp_req->state == i) { 509 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool); 510 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode); 511 } 512 } 513 } 514 } 515 516 static void 517 _nvmf_tcp_qpair_destroy(void *_tqpair) 518 { 519 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 520 spdk_nvmf_transport_qpair_fini_cb cb_fn = tqpair->fini_cb_fn; 521 void *cb_arg = tqpair->fini_cb_arg; 522 int err = 0; 523 524 spdk_trace_record(TRACE_TCP_QP_DESTROY, 0, 0, (uintptr_t)tqpair); 525 526 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 527 528 err = spdk_sock_close(&tqpair->sock); 529 assert(err == 0); 530 nvmf_tcp_cleanup_all_states(tqpair); 531 532 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) { 533 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair, 534 tqpair->state_cntr[TCP_REQUEST_STATE_FREE], 535 tqpair->resource_count); 536 err++; 537 } 538 539 if (err > 0) { 540 nvmf_tcp_dump_qpair_req_contents(tqpair); 541 } 542 543 /* The timeout poller might still be registered here if we close the qpair before host 544 * terminates the connection. 545 */ 546 spdk_poller_unregister(&tqpair->timeout_poller); 547 spdk_dma_free(tqpair->pdus); 548 free(tqpair->reqs); 549 spdk_free(tqpair->bufs); 550 free(tqpair); 551 552 if (cb_fn != NULL) { 553 cb_fn(cb_arg); 554 } 555 556 SPDK_DEBUGLOG(nvmf_tcp, "Leave\n"); 557 } 558 559 static void 560 nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair) 561 { 562 /* Delay the destruction to make sure it isn't performed from the context of a sock 563 * callback. Otherwise, spdk_sock_close() might not abort pending requests, causing their 564 * completions to be executed after the qpair is freed. (Note: this fixed issue #2471.) 565 */ 566 spdk_thread_send_msg(spdk_get_thread(), _nvmf_tcp_qpair_destroy, tqpair); 567 } 568 569 static void 570 nvmf_tcp_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w) 571 { 572 struct spdk_nvmf_tcp_transport *ttransport; 573 assert(w != NULL); 574 575 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 576 spdk_json_write_named_bool(w, "c2h_success", ttransport->tcp_opts.c2h_success); 577 spdk_json_write_named_uint32(w, "sock_priority", ttransport->tcp_opts.sock_priority); 578 } 579 580 static int 581 nvmf_tcp_destroy(struct spdk_nvmf_transport *transport, 582 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 583 { 584 struct spdk_nvmf_tcp_transport *ttransport; 585 586 assert(transport != NULL); 587 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 588 589 spdk_poller_unregister(&ttransport->accept_poller); 590 free(ttransport); 591 592 if (cb_fn) { 593 cb_fn(cb_arg); 594 } 595 return 0; 596 } 597 598 static int nvmf_tcp_accept(void *ctx); 599 600 static struct spdk_nvmf_transport * 601 nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts) 602 { 603 struct spdk_nvmf_tcp_transport *ttransport; 604 uint32_t sge_count; 605 uint32_t min_shared_buffers; 606 607 ttransport = calloc(1, sizeof(*ttransport)); 608 if (!ttransport) { 609 return NULL; 610 } 611 612 TAILQ_INIT(&ttransport->ports); 613 TAILQ_INIT(&ttransport->poll_groups); 614 615 ttransport->transport.ops = &spdk_nvmf_transport_tcp; 616 617 ttransport->tcp_opts.c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION; 618 ttransport->tcp_opts.sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY; 619 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 620 if (opts->transport_specific != NULL && 621 spdk_json_decode_object_relaxed(opts->transport_specific, tcp_transport_opts_decoder, 622 SPDK_COUNTOF(tcp_transport_opts_decoder), 623 &ttransport->tcp_opts)) { 624 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 625 free(ttransport); 626 return NULL; 627 } 628 629 SPDK_NOTICELOG("*** TCP Transport Init ***\n"); 630 631 SPDK_INFOLOG(nvmf_tcp, "*** TCP Transport Init ***\n" 632 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 633 " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 634 " in_capsule_data_size=%d, max_aq_depth=%d\n" 635 " num_shared_buffers=%d, c2h_success=%d,\n" 636 " dif_insert_or_strip=%d, sock_priority=%d\n" 637 " abort_timeout_sec=%d, control_msg_num=%hu\n", 638 opts->max_queue_depth, 639 opts->max_io_size, 640 opts->max_qpairs_per_ctrlr - 1, 641 opts->io_unit_size, 642 opts->in_capsule_data_size, 643 opts->max_aq_depth, 644 opts->num_shared_buffers, 645 ttransport->tcp_opts.c2h_success, 646 opts->dif_insert_or_strip, 647 ttransport->tcp_opts.sock_priority, 648 opts->abort_timeout_sec, 649 ttransport->tcp_opts.control_msg_num); 650 651 if (ttransport->tcp_opts.sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) { 652 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n" 653 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n", 654 ttransport->tcp_opts.sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY); 655 free(ttransport); 656 return NULL; 657 } 658 659 if (ttransport->tcp_opts.control_msg_num == 0 && 660 opts->in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 661 SPDK_WARNLOG("TCP param control_msg_num can't be 0 if ICD is less than %u bytes. Using default value %u\n", 662 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM); 663 ttransport->tcp_opts.control_msg_num = SPDK_NVMF_TCP_DEFAULT_CONTROL_MSG_NUM; 664 } 665 666 /* I/O unit size cannot be larger than max I/O size */ 667 if (opts->io_unit_size > opts->max_io_size) { 668 SPDK_WARNLOG("TCP param io_unit_size %u can't be larger than max_io_size %u. Using max_io_size as io_unit_size\n", 669 opts->io_unit_size, opts->max_io_size); 670 opts->io_unit_size = opts->max_io_size; 671 } 672 673 /* In capsule data size cannot be larger than max I/O size */ 674 if (opts->in_capsule_data_size > opts->max_io_size) { 675 SPDK_WARNLOG("TCP param ICD size %u can't be larger than max_io_size %u. Using max_io_size as ICD size\n", 676 opts->io_unit_size, opts->max_io_size); 677 opts->in_capsule_data_size = opts->max_io_size; 678 } 679 680 /* max IO queue depth cannot be smaller than 2 or larger than 65535. 681 * We will not check SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, because max_queue_depth is 16bits and always not larger than 64k. */ 682 if (opts->max_queue_depth < SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH) { 683 SPDK_WARNLOG("TCP param max_queue_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 684 opts->max_queue_depth, SPDK_NVMF_TCP_MIN_IO_QUEUE_DEPTH, 685 SPDK_NVMF_TCP_MAX_IO_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH); 686 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 687 } 688 689 /* max admin queue depth cannot be smaller than 2 or larger than 4096 */ 690 if (opts->max_aq_depth < SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH || 691 opts->max_aq_depth > SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH) { 692 SPDK_WARNLOG("TCP param max_aq_depth %u can't be smaller than %u or larger than %u. Using default value %u\n", 693 opts->max_aq_depth, SPDK_NVMF_TCP_MIN_ADMIN_QUEUE_DEPTH, 694 SPDK_NVMF_TCP_MAX_ADMIN_QUEUE_DEPTH, SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH); 695 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 696 } 697 698 sge_count = opts->max_io_size / opts->io_unit_size; 699 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) { 700 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 701 free(ttransport); 702 return NULL; 703 } 704 705 min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size; 706 if (min_shared_buffers > opts->num_shared_buffers) { 707 SPDK_ERRLOG("There are not enough buffers to satisfy " 708 "per-poll group caches for each thread. (%" PRIu32 ") " 709 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 710 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 711 free(ttransport); 712 return NULL; 713 } 714 715 ttransport->accept_poller = SPDK_POLLER_REGISTER(nvmf_tcp_accept, &ttransport->transport, 716 opts->acceptor_poll_rate); 717 if (!ttransport->accept_poller) { 718 free(ttransport); 719 return NULL; 720 } 721 722 return &ttransport->transport; 723 } 724 725 static int 726 nvmf_tcp_trsvcid_to_int(const char *trsvcid) 727 { 728 unsigned long long ull; 729 char *end = NULL; 730 731 ull = strtoull(trsvcid, &end, 10); 732 if (end == NULL || end == trsvcid || *end != '\0') { 733 return -1; 734 } 735 736 /* Valid TCP/IP port numbers are in [0, 65535] */ 737 if (ull > 65535) { 738 return -1; 739 } 740 741 return (int)ull; 742 } 743 744 /** 745 * Canonicalize a listen address trid. 746 */ 747 static int 748 nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid, 749 const struct spdk_nvme_transport_id *trid) 750 { 751 int trsvcid_int; 752 753 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 754 if (trsvcid_int < 0) { 755 return -EINVAL; 756 } 757 758 memset(canon_trid, 0, sizeof(*canon_trid)); 759 spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP); 760 canon_trid->adrfam = trid->adrfam; 761 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr); 762 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int); 763 764 return 0; 765 } 766 767 /** 768 * Find an existing listening port. 769 */ 770 static struct spdk_nvmf_tcp_port * 771 nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport, 772 const struct spdk_nvme_transport_id *trid) 773 { 774 struct spdk_nvme_transport_id canon_trid; 775 struct spdk_nvmf_tcp_port *port; 776 777 if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) { 778 return NULL; 779 } 780 781 TAILQ_FOREACH(port, &ttransport->ports, link) { 782 if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) { 783 return port; 784 } 785 } 786 787 return NULL; 788 } 789 790 static int 791 nvmf_tcp_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid, 792 struct spdk_nvmf_listen_opts *listen_opts) 793 { 794 struct spdk_nvmf_tcp_transport *ttransport; 795 struct spdk_nvmf_tcp_port *port; 796 int trsvcid_int; 797 uint8_t adrfam; 798 struct spdk_sock_opts opts; 799 800 if (!strlen(trid->trsvcid)) { 801 SPDK_ERRLOG("Service id is required\n"); 802 return -EINVAL; 803 } 804 805 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 806 807 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid); 808 if (trsvcid_int < 0) { 809 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid); 810 return -EINVAL; 811 } 812 813 port = calloc(1, sizeof(*port)); 814 if (!port) { 815 SPDK_ERRLOG("Port allocation failed\n"); 816 return -ENOMEM; 817 } 818 819 port->trid = trid; 820 opts.opts_size = sizeof(opts); 821 spdk_sock_get_default_opts(&opts); 822 opts.priority = ttransport->tcp_opts.sock_priority; 823 /* TODO: also add impl_opts like on the initiator side */ 824 port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int, 825 NULL, &opts); 826 if (port->listen_sock == NULL) { 827 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n", 828 trid->traddr, trsvcid_int, 829 spdk_strerror(errno), errno); 830 free(port); 831 return -errno; 832 } 833 834 if (spdk_sock_is_ipv4(port->listen_sock)) { 835 adrfam = SPDK_NVMF_ADRFAM_IPV4; 836 } else if (spdk_sock_is_ipv6(port->listen_sock)) { 837 adrfam = SPDK_NVMF_ADRFAM_IPV6; 838 } else { 839 SPDK_ERRLOG("Unhandled socket type\n"); 840 adrfam = 0; 841 } 842 843 if (adrfam != trid->adrfam) { 844 SPDK_ERRLOG("Socket address family mismatch\n"); 845 spdk_sock_close(&port->listen_sock); 846 free(port); 847 return -EINVAL; 848 } 849 850 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n", 851 trid->traddr, trid->trsvcid); 852 853 TAILQ_INSERT_TAIL(&ttransport->ports, port, link); 854 return 0; 855 } 856 857 static void 858 nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport, 859 const struct spdk_nvme_transport_id *trid) 860 { 861 struct spdk_nvmf_tcp_transport *ttransport; 862 struct spdk_nvmf_tcp_port *port; 863 864 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 865 866 SPDK_DEBUGLOG(nvmf_tcp, "Removing listen address %s port %s\n", 867 trid->traddr, trid->trsvcid); 868 869 port = nvmf_tcp_find_port(ttransport, trid); 870 if (port) { 871 TAILQ_REMOVE(&ttransport->ports, port, link); 872 spdk_sock_close(&port->listen_sock); 873 free(port); 874 } 875 } 876 877 static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 878 enum nvme_tcp_pdu_recv_state state); 879 880 static void 881 nvmf_tcp_qpair_set_state(struct spdk_nvmf_tcp_qpair *tqpair, enum nvme_tcp_qpair_state state) 882 { 883 tqpair->state = state; 884 spdk_trace_record(TRACE_TCP_QP_STATE_CHANGE, tqpair->qpair.qid, 0, (uintptr_t)tqpair, 885 tqpair->state); 886 } 887 888 static void 889 nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair) 890 { 891 SPDK_DEBUGLOG(nvmf_tcp, "Disconnecting qpair %p\n", tqpair); 892 893 spdk_trace_record(TRACE_TCP_QP_DISCONNECT, 0, 0, (uintptr_t)tqpair); 894 895 if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) { 896 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_EXITING); 897 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 898 spdk_poller_unregister(&tqpair->timeout_poller); 899 900 /* This will end up calling nvmf_tcp_close_qpair */ 901 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL); 902 } 903 } 904 905 static void 906 _mgmt_pdu_write_done(void *_tqpair, int err) 907 { 908 struct spdk_nvmf_tcp_qpair *tqpair = _tqpair; 909 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 910 911 if (spdk_unlikely(err != 0)) { 912 nvmf_tcp_qpair_disconnect(tqpair); 913 return; 914 } 915 916 assert(pdu->cb_fn != NULL); 917 pdu->cb_fn(pdu->cb_arg); 918 } 919 920 static void 921 _req_pdu_write_done(void *req, int err) 922 { 923 struct spdk_nvmf_tcp_req *tcp_req = req; 924 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 925 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 926 927 assert(tcp_req->pdu_in_use); 928 tcp_req->pdu_in_use = false; 929 930 /* If the request is in a completed state, we're waiting for write completion to free it */ 931 if (spdk_unlikely(tcp_req->state == TCP_REQUEST_STATE_COMPLETED)) { 932 nvmf_tcp_request_free(tcp_req); 933 return; 934 } 935 936 if (spdk_unlikely(err != 0)) { 937 nvmf_tcp_qpair_disconnect(tqpair); 938 return; 939 } 940 941 assert(pdu->cb_fn != NULL); 942 pdu->cb_fn(pdu->cb_arg); 943 } 944 945 static void 946 _pdu_write_done(struct nvme_tcp_pdu *pdu, int err) 947 { 948 pdu->sock_req.cb_fn(pdu->sock_req.cb_arg, err); 949 } 950 951 static void 952 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 953 { 954 uint32_t mapped_length = 0; 955 ssize_t rc; 956 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 957 958 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 959 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, 960 &mapped_length); 961 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP || 962 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) { 963 rc = spdk_sock_writev(tqpair->sock, pdu->iov, pdu->sock_req.iovcnt); 964 if (rc == mapped_length) { 965 _pdu_write_done(pdu, 0); 966 } else { 967 SPDK_ERRLOG("IC_RESP or TERM_REQ could not write to socket.\n"); 968 _pdu_write_done(pdu, -1); 969 } 970 } else { 971 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 972 } 973 } 974 975 static void 976 data_crc32_accel_done(void *cb_arg, int status) 977 { 978 struct nvme_tcp_pdu *pdu = cb_arg; 979 980 if (spdk_unlikely(status)) { 981 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 982 _pdu_write_done(pdu, status); 983 return; 984 } 985 986 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 987 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 988 989 _tcp_write_pdu(pdu); 990 } 991 992 static void 993 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 994 { 995 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 996 int rc = 0; 997 998 /* Data Digest */ 999 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { 1000 /* Only suport this limitated case for the first step */ 1001 if (spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1002 && tqpair->group)) { 1003 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 1004 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 1005 if (spdk_likely(rc == 0)) { 1006 return; 1007 } 1008 } else { 1009 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1010 } 1011 data_crc32_accel_done(pdu, rc); 1012 } else { 1013 _tcp_write_pdu(pdu); 1014 } 1015 } 1016 1017 static void 1018 nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1019 struct nvme_tcp_pdu *pdu, 1020 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1021 void *cb_arg) 1022 { 1023 int hlen; 1024 uint32_t crc32c; 1025 1026 assert(tqpair->pdu_in_progress != pdu); 1027 1028 hlen = pdu->hdr.common.hlen; 1029 pdu->cb_fn = cb_fn; 1030 pdu->cb_arg = cb_arg; 1031 1032 pdu->iov[0].iov_base = &pdu->hdr.raw; 1033 pdu->iov[0].iov_len = hlen; 1034 1035 /* Header Digest */ 1036 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { 1037 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1038 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 1039 } 1040 1041 /* Data Digest */ 1042 pdu_data_crc32_compute(pdu); 1043 } 1044 1045 static void 1046 nvmf_tcp_qpair_write_mgmt_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1047 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1048 void *cb_arg) 1049 { 1050 struct nvme_tcp_pdu *pdu = tqpair->mgmt_pdu; 1051 1052 pdu->sock_req.cb_fn = _mgmt_pdu_write_done; 1053 pdu->sock_req.cb_arg = tqpair; 1054 1055 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1056 } 1057 1058 static void 1059 nvmf_tcp_qpair_write_req_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1060 struct spdk_nvmf_tcp_req *tcp_req, 1061 nvme_tcp_qpair_xfer_complete_cb cb_fn, 1062 void *cb_arg) 1063 { 1064 struct nvme_tcp_pdu *pdu = tcp_req->pdu; 1065 1066 pdu->sock_req.cb_fn = _req_pdu_write_done; 1067 pdu->sock_req.cb_arg = tcp_req; 1068 1069 assert(!tcp_req->pdu_in_use); 1070 tcp_req->pdu_in_use = true; 1071 1072 nvmf_tcp_qpair_write_pdu(tqpair, pdu, cb_fn, cb_arg); 1073 } 1074 1075 static int 1076 nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair) 1077 { 1078 uint32_t i; 1079 struct spdk_nvmf_transport_opts *opts; 1080 uint32_t in_capsule_data_size; 1081 1082 opts = &tqpair->qpair.transport->opts; 1083 1084 in_capsule_data_size = opts->in_capsule_data_size; 1085 if (opts->dif_insert_or_strip) { 1086 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size); 1087 } 1088 1089 tqpair->resource_count = opts->max_queue_depth; 1090 1091 tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs)); 1092 if (!tqpair->reqs) { 1093 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair); 1094 return -1; 1095 } 1096 1097 if (in_capsule_data_size) { 1098 tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000, 1099 NULL, SPDK_ENV_LCORE_ID_ANY, 1100 SPDK_MALLOC_DMA); 1101 if (!tqpair->bufs) { 1102 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair); 1103 return -1; 1104 } 1105 } 1106 /* prepare memory space for receiving pdus and tcp_req */ 1107 /* Add additional 1 member, which will be used for mgmt_pdu owned by the tqpair */ 1108 tqpair->pdus = spdk_dma_zmalloc((2 * tqpair->resource_count + 1) * sizeof(*tqpair->pdus), 0x1000, 1109 NULL); 1110 if (!tqpair->pdus) { 1111 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair); 1112 return -1; 1113 } 1114 1115 for (i = 0; i < tqpair->resource_count; i++) { 1116 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i]; 1117 1118 tcp_req->ttag = i + 1; 1119 tcp_req->req.qpair = &tqpair->qpair; 1120 1121 tcp_req->pdu = &tqpair->pdus[i]; 1122 tcp_req->pdu->qpair = tqpair; 1123 1124 /* Set up memory to receive commands */ 1125 if (tqpair->bufs) { 1126 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size)); 1127 } 1128 1129 /* Set the cmdn and rsp */ 1130 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp; 1131 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd; 1132 1133 tcp_req->req.stripped_data = NULL; 1134 1135 /* Initialize request state to FREE */ 1136 tcp_req->state = TCP_REQUEST_STATE_FREE; 1137 TAILQ_INSERT_TAIL(&tqpair->tcp_req_free_queue, tcp_req, state_link); 1138 tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++; 1139 } 1140 1141 for (; i < 2 * tqpair->resource_count; i++) { 1142 struct nvme_tcp_pdu *pdu = &tqpair->pdus[i]; 1143 1144 pdu->qpair = tqpair; 1145 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 1146 } 1147 1148 tqpair->mgmt_pdu = &tqpair->pdus[i]; 1149 tqpair->mgmt_pdu->qpair = tqpair; 1150 tqpair->pdu_in_progress = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 1151 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 1152 1153 tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 * 1154 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1155 1156 return 0; 1157 } 1158 1159 static int 1160 nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair) 1161 { 1162 struct spdk_nvmf_tcp_qpair *tqpair; 1163 1164 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1165 1166 SPDK_DEBUGLOG(nvmf_tcp, "New TCP Connection: %p\n", qpair); 1167 1168 spdk_trace_record(TRACE_TCP_QP_CREATE, 0, 0, (uintptr_t)tqpair); 1169 1170 /* Initialise request state queues of the qpair */ 1171 TAILQ_INIT(&tqpair->tcp_req_free_queue); 1172 TAILQ_INIT(&tqpair->tcp_req_working_queue); 1173 SLIST_INIT(&tqpair->tcp_pdu_free_queue); 1174 1175 tqpair->host_hdgst_enable = true; 1176 tqpair->host_ddgst_enable = true; 1177 1178 return 0; 1179 } 1180 1181 static int 1182 nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair) 1183 { 1184 int rc; 1185 1186 spdk_trace_record(TRACE_TCP_QP_SOCK_INIT, 0, 0, (uintptr_t)tqpair); 1187 1188 /* set low water mark */ 1189 rc = spdk_sock_set_recvlowat(tqpair->sock, 1); 1190 if (rc != 0) { 1191 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n"); 1192 return rc; 1193 } 1194 1195 return 0; 1196 } 1197 1198 static void 1199 nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport, 1200 struct spdk_nvmf_tcp_port *port, 1201 struct spdk_sock *sock) 1202 { 1203 struct spdk_nvmf_tcp_qpair *tqpair; 1204 int rc; 1205 1206 SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n", 1207 port->trid->traddr, port->trid->trsvcid); 1208 1209 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair)); 1210 if (tqpair == NULL) { 1211 SPDK_ERRLOG("Could not allocate new connection.\n"); 1212 spdk_sock_close(&sock); 1213 return; 1214 } 1215 1216 tqpair->sock = sock; 1217 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0; 1218 tqpair->port = port; 1219 tqpair->qpair.transport = transport; 1220 1221 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr, 1222 sizeof(tqpair->target_addr), &tqpair->target_port, 1223 tqpair->initiator_addr, sizeof(tqpair->initiator_addr), 1224 &tqpair->initiator_port); 1225 if (rc < 0) { 1226 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair); 1227 nvmf_tcp_qpair_destroy(tqpair); 1228 return; 1229 } 1230 1231 spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair); 1232 } 1233 1234 static uint32_t 1235 nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port) 1236 { 1237 struct spdk_sock *sock; 1238 uint32_t count = 0; 1239 int i; 1240 1241 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) { 1242 sock = spdk_sock_accept(port->listen_sock); 1243 if (sock == NULL) { 1244 break; 1245 } 1246 count++; 1247 nvmf_tcp_handle_connect(transport, port, sock); 1248 } 1249 1250 return count; 1251 } 1252 1253 static int 1254 nvmf_tcp_accept(void *ctx) 1255 { 1256 struct spdk_nvmf_transport *transport = ctx; 1257 struct spdk_nvmf_tcp_transport *ttransport; 1258 struct spdk_nvmf_tcp_port *port; 1259 uint32_t count = 0; 1260 1261 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1262 1263 TAILQ_FOREACH(port, &ttransport->ports, link) { 1264 count += nvmf_tcp_port_accept(transport, port); 1265 } 1266 1267 return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 1268 } 1269 1270 static void 1271 nvmf_tcp_discover(struct spdk_nvmf_transport *transport, 1272 struct spdk_nvme_transport_id *trid, 1273 struct spdk_nvmf_discovery_log_page_entry *entry) 1274 { 1275 entry->trtype = SPDK_NVMF_TRTYPE_TCP; 1276 entry->adrfam = trid->adrfam; 1277 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED; 1278 1279 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 1280 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 1281 1282 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; 1283 } 1284 1285 static struct spdk_nvmf_tcp_control_msg_list * 1286 nvmf_tcp_control_msg_list_create(uint16_t num_messages) 1287 { 1288 struct spdk_nvmf_tcp_control_msg_list *list; 1289 struct spdk_nvmf_tcp_control_msg *msg; 1290 uint16_t i; 1291 1292 list = calloc(1, sizeof(*list)); 1293 if (!list) { 1294 SPDK_ERRLOG("Failed to allocate memory for list structure\n"); 1295 return NULL; 1296 } 1297 1298 list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, 1299 NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 1300 if (!list->msg_buf) { 1301 SPDK_ERRLOG("Failed to allocate memory for control message buffers\n"); 1302 free(list); 1303 return NULL; 1304 } 1305 1306 STAILQ_INIT(&list->free_msgs); 1307 1308 for (i = 0; i < num_messages; i++) { 1309 msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i * 1310 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1311 STAILQ_INSERT_TAIL(&list->free_msgs, msg, link); 1312 } 1313 1314 return list; 1315 } 1316 1317 static void 1318 nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list) 1319 { 1320 if (!list) { 1321 return; 1322 } 1323 1324 spdk_free(list->msg_buf); 1325 free(list); 1326 } 1327 1328 static struct spdk_nvmf_transport_poll_group * 1329 nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport, 1330 struct spdk_nvmf_poll_group *group) 1331 { 1332 struct spdk_nvmf_tcp_transport *ttransport; 1333 struct spdk_nvmf_tcp_poll_group *tgroup; 1334 1335 tgroup = calloc(1, sizeof(*tgroup)); 1336 if (!tgroup) { 1337 return NULL; 1338 } 1339 1340 tgroup->sock_group = spdk_sock_group_create(&tgroup->group); 1341 if (!tgroup->sock_group) { 1342 goto cleanup; 1343 } 1344 1345 TAILQ_INIT(&tgroup->qpairs); 1346 TAILQ_INIT(&tgroup->await_req); 1347 1348 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport); 1349 1350 if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { 1351 SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). " 1352 "Creating control messages list\n", transport->opts.in_capsule_data_size, 1353 SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 1354 tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(ttransport->tcp_opts.control_msg_num); 1355 if (!tgroup->control_msg_list) { 1356 goto cleanup; 1357 } 1358 } 1359 1360 tgroup->accel_channel = spdk_accel_get_io_channel(); 1361 if (spdk_unlikely(!tgroup->accel_channel)) { 1362 SPDK_ERRLOG("Cannot create accel_channel for tgroup=%p\n", tgroup); 1363 goto cleanup; 1364 } 1365 1366 TAILQ_INSERT_TAIL(&ttransport->poll_groups, tgroup, link); 1367 if (ttransport->next_pg == NULL) { 1368 ttransport->next_pg = tgroup; 1369 } 1370 1371 return &tgroup->group; 1372 1373 cleanup: 1374 nvmf_tcp_poll_group_destroy(&tgroup->group); 1375 return NULL; 1376 } 1377 1378 static struct spdk_nvmf_transport_poll_group * 1379 nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair) 1380 { 1381 struct spdk_nvmf_tcp_transport *ttransport; 1382 struct spdk_nvmf_tcp_poll_group **pg; 1383 struct spdk_nvmf_tcp_qpair *tqpair; 1384 struct spdk_sock_group *group = NULL, *hint = NULL; 1385 int rc; 1386 1387 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 1388 1389 if (TAILQ_EMPTY(&ttransport->poll_groups)) { 1390 return NULL; 1391 } 1392 1393 pg = &ttransport->next_pg; 1394 assert(*pg != NULL); 1395 hint = (*pg)->sock_group; 1396 1397 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 1398 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, hint); 1399 if (rc != 0) { 1400 return NULL; 1401 } else if (group != NULL) { 1402 /* Optimal poll group was found */ 1403 return spdk_sock_group_get_ctx(group); 1404 } 1405 1406 /* The hint was used for optimal poll group, advance next_pg. */ 1407 *pg = TAILQ_NEXT(*pg, link); 1408 if (*pg == NULL) { 1409 *pg = TAILQ_FIRST(&ttransport->poll_groups); 1410 } 1411 1412 return spdk_sock_group_get_ctx(hint); 1413 } 1414 1415 static void 1416 nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1417 { 1418 struct spdk_nvmf_tcp_poll_group *tgroup, *next_tgroup; 1419 struct spdk_nvmf_tcp_transport *ttransport; 1420 1421 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 1422 spdk_sock_group_close(&tgroup->sock_group); 1423 if (tgroup->control_msg_list) { 1424 nvmf_tcp_control_msg_list_free(tgroup->control_msg_list); 1425 } 1426 1427 if (tgroup->accel_channel) { 1428 spdk_put_io_channel(tgroup->accel_channel); 1429 } 1430 1431 ttransport = SPDK_CONTAINEROF(tgroup->group.transport, struct spdk_nvmf_tcp_transport, transport); 1432 1433 next_tgroup = TAILQ_NEXT(tgroup, link); 1434 TAILQ_REMOVE(&ttransport->poll_groups, tgroup, link); 1435 if (next_tgroup == NULL) { 1436 next_tgroup = TAILQ_FIRST(&ttransport->poll_groups); 1437 } 1438 if (ttransport->next_pg == tgroup) { 1439 ttransport->next_pg = next_tgroup; 1440 } 1441 1442 free(tgroup); 1443 } 1444 1445 static void 1446 nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair, 1447 enum nvme_tcp_pdu_recv_state state) 1448 { 1449 if (tqpair->recv_state == state) { 1450 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 1451 tqpair, state); 1452 return; 1453 } 1454 1455 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1456 /* When leaving the await req state, move the qpair to the main list */ 1457 TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link); 1458 TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link); 1459 } else if (state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 1460 TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link); 1461 TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link); 1462 } 1463 1464 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv state=%d\n", tqpair, state); 1465 tqpair->recv_state = state; 1466 1467 spdk_trace_record(TRACE_TCP_QP_RCV_STATE_CHANGE, tqpair->qpair.qid, 0, (uintptr_t)tqpair, 1468 tqpair->recv_state); 1469 } 1470 1471 static int 1472 nvmf_tcp_qpair_handle_timeout(void *ctx) 1473 { 1474 struct spdk_nvmf_tcp_qpair *tqpair = ctx; 1475 1476 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR); 1477 1478 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair, 1479 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT); 1480 1481 nvmf_tcp_qpair_disconnect(tqpair); 1482 return SPDK_POLLER_BUSY; 1483 } 1484 1485 static void 1486 nvmf_tcp_send_c2h_term_req_complete(void *cb_arg) 1487 { 1488 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg; 1489 1490 if (!tqpair->timeout_poller) { 1491 tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair, 1492 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000); 1493 } 1494 } 1495 1496 static void 1497 nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1498 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1499 { 1500 struct nvme_tcp_pdu *rsp_pdu; 1501 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req; 1502 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req); 1503 uint32_t copy_len; 1504 1505 rsp_pdu = tqpair->mgmt_pdu; 1506 1507 c2h_term_req = &rsp_pdu->hdr.term_req; 1508 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ; 1509 c2h_term_req->common.hlen = c2h_term_req_hdr_len; 1510 c2h_term_req->fes = fes; 1511 1512 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1513 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1514 DSET32(&c2h_term_req->fei, error_offset); 1515 } 1516 1517 copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); 1518 1519 /* Copy the error info into the buffer */ 1520 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len); 1521 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len); 1522 1523 /* Contain the header of the wrong received pdu */ 1524 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len; 1525 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1526 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_c2h_term_req_complete, tqpair); 1527 } 1528 1529 static void 1530 nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1531 struct spdk_nvmf_tcp_qpair *tqpair, 1532 struct nvme_tcp_pdu *pdu) 1533 { 1534 struct spdk_nvmf_tcp_req *tcp_req; 1535 1536 assert(pdu->psh_valid_bytes == pdu->psh_len); 1537 assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD); 1538 1539 tcp_req = nvmf_tcp_req_get(tqpair); 1540 if (!tcp_req) { 1541 /* Directly return and make the allocation retry again. This can happen if we're 1542 * using asynchronous writes to send the response to the host or when releasing 1543 * zero-copy buffers after a response has been sent. In both cases, the host might 1544 * receive the response before we've finished processing the request and is free to 1545 * send another one. 1546 */ 1547 if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0 || 1548 tqpair->state_cntr[TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE] > 0) { 1549 return; 1550 } 1551 1552 /* The host sent more commands than the maximum queue depth. */ 1553 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair); 1554 nvmf_tcp_qpair_disconnect(tqpair); 1555 return; 1556 } 1557 1558 pdu->req = tcp_req; 1559 assert(tcp_req->state == TCP_REQUEST_STATE_NEW); 1560 nvmf_tcp_req_process(ttransport, tcp_req); 1561 } 1562 1563 static void 1564 nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1565 struct spdk_nvmf_tcp_qpair *tqpair, 1566 struct nvme_tcp_pdu *pdu) 1567 { 1568 struct spdk_nvmf_tcp_req *tcp_req; 1569 struct spdk_nvme_tcp_cmd *capsule_cmd; 1570 uint32_t error_offset = 0; 1571 enum spdk_nvme_tcp_term_req_fes fes; 1572 struct spdk_nvme_cpl *rsp; 1573 1574 capsule_cmd = &pdu->hdr.capsule_cmd; 1575 tcp_req = pdu->req; 1576 assert(tcp_req != NULL); 1577 1578 /* Zero-copy requests don't support ICD */ 1579 assert(!spdk_nvmf_request_using_zcopy(&tcp_req->req)); 1580 1581 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) { 1582 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n", 1583 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo); 1584 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1585 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 1586 goto err; 1587 } 1588 1589 rsp = &tcp_req->req.rsp->nvme_cpl; 1590 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 1591 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 1592 } else { 1593 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1594 } 1595 1596 nvmf_tcp_req_process(ttransport, tcp_req); 1597 1598 return; 1599 err: 1600 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1601 } 1602 1603 static void 1604 nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport, 1605 struct spdk_nvmf_tcp_qpair *tqpair, 1606 struct nvme_tcp_pdu *pdu) 1607 { 1608 struct spdk_nvmf_tcp_req *tcp_req; 1609 uint32_t error_offset = 0; 1610 enum spdk_nvme_tcp_term_req_fes fes = 0; 1611 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1612 1613 h2c_data = &pdu->hdr.h2c_data; 1614 1615 SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n", 1616 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag); 1617 1618 if (h2c_data->ttag > tqpair->resource_count) { 1619 SPDK_DEBUGLOG(nvmf_tcp, "ttag %u is larger than allowed %u.\n", h2c_data->ttag, 1620 tqpair->resource_count); 1621 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1622 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1623 goto err; 1624 } 1625 1626 tcp_req = &tqpair->reqs[h2c_data->ttag - 1]; 1627 1628 if (spdk_unlikely(tcp_req->state != TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER && 1629 tcp_req->state != TCP_REQUEST_STATE_AWAITING_R2T_ACK)) { 1630 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, has error state in %d\n", tcp_req, tqpair, 1631 tcp_req->state); 1632 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1633 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag); 1634 goto err; 1635 } 1636 1637 if (spdk_unlikely(tcp_req->req.cmd->nvme_cmd.cid != h2c_data->cccid)) { 1638 SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, expected %u but %u for cccid.\n", tcp_req, tqpair, 1639 tcp_req->req.cmd->nvme_cmd.cid, h2c_data->cccid); 1640 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1641 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid); 1642 goto err; 1643 } 1644 1645 if (tcp_req->h2c_offset != h2c_data->datao) { 1646 SPDK_DEBUGLOG(nvmf_tcp, 1647 "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n", 1648 tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao); 1649 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1650 goto err; 1651 } 1652 1653 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) { 1654 SPDK_DEBUGLOG(nvmf_tcp, 1655 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) exceeds requested length=%u\n", 1656 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length); 1657 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1658 goto err; 1659 } 1660 1661 pdu->req = tcp_req; 1662 1663 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 1664 pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 1665 } 1666 1667 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 1668 h2c_data->datao, h2c_data->datal); 1669 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1670 return; 1671 1672 err: 1673 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1674 } 1675 1676 static void 1677 nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req, 1678 struct spdk_nvmf_tcp_qpair *tqpair) 1679 { 1680 struct nvme_tcp_pdu *rsp_pdu; 1681 struct spdk_nvme_tcp_rsp *capsule_resp; 1682 1683 SPDK_DEBUGLOG(nvmf_tcp, "enter, tqpair=%p\n", tqpair); 1684 1685 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1686 assert(rsp_pdu != NULL); 1687 1688 capsule_resp = &rsp_pdu->hdr.capsule_resp; 1689 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1690 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp); 1691 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl; 1692 if (tqpair->host_hdgst_enable) { 1693 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1694 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1695 } 1696 1697 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_request_free, tcp_req); 1698 } 1699 1700 static void 1701 nvmf_tcp_pdu_c2h_data_complete(void *cb_arg) 1702 { 1703 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1704 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, 1705 struct spdk_nvmf_tcp_qpair, qpair); 1706 1707 assert(tqpair != NULL); 1708 1709 if (spdk_unlikely(tcp_req->pdu->rw_offset < tcp_req->req.length)) { 1710 SPDK_DEBUGLOG(nvmf_tcp, "sending another C2H part, offset %u length %u\n", tcp_req->pdu->rw_offset, 1711 tcp_req->req.length); 1712 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 1713 return; 1714 } 1715 1716 if (tcp_req->pdu->hdr.c2h_data.common.flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1717 nvmf_tcp_request_free(tcp_req); 1718 } else { 1719 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 1720 } 1721 } 1722 1723 static void 1724 nvmf_tcp_r2t_complete(void *cb_arg) 1725 { 1726 struct spdk_nvmf_tcp_req *tcp_req = cb_arg; 1727 struct spdk_nvmf_tcp_transport *ttransport; 1728 1729 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport, 1730 struct spdk_nvmf_tcp_transport, transport); 1731 1732 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 1733 1734 if (tcp_req->h2c_offset == tcp_req->req.length) { 1735 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1736 nvmf_tcp_req_process(ttransport, tcp_req); 1737 } 1738 } 1739 1740 static void 1741 nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair, 1742 struct spdk_nvmf_tcp_req *tcp_req) 1743 { 1744 struct nvme_tcp_pdu *rsp_pdu; 1745 struct spdk_nvme_tcp_r2t_hdr *r2t; 1746 1747 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req); 1748 assert(rsp_pdu != NULL); 1749 1750 r2t = &rsp_pdu->hdr.r2t; 1751 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T; 1752 r2t->common.plen = r2t->common.hlen = sizeof(*r2t); 1753 1754 if (tqpair->host_hdgst_enable) { 1755 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1756 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN; 1757 } 1758 1759 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid; 1760 r2t->ttag = tcp_req->ttag; 1761 r2t->r2to = tcp_req->h2c_offset; 1762 r2t->r2tl = tcp_req->req.length; 1763 1764 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK); 1765 1766 SPDK_DEBUGLOG(nvmf_tcp, 1767 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n", 1768 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl); 1769 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_r2t_complete, tcp_req); 1770 } 1771 1772 static void 1773 nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport, 1774 struct spdk_nvmf_tcp_qpair *tqpair, 1775 struct nvme_tcp_pdu *pdu) 1776 { 1777 struct spdk_nvmf_tcp_req *tcp_req; 1778 struct spdk_nvme_cpl *rsp; 1779 1780 tcp_req = pdu->req; 1781 assert(tcp_req != NULL); 1782 1783 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1784 1785 tcp_req->h2c_offset += pdu->data_len; 1786 1787 /* Wait for all of the data to arrive AND for the initial R2T PDU send to be 1788 * acknowledged before moving on. */ 1789 if (tcp_req->h2c_offset == tcp_req->req.length && 1790 tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) { 1791 /* After receiving all the h2c data, we need to check whether there is 1792 * transient transport error */ 1793 rsp = &tcp_req->req.rsp->nvme_cpl; 1794 if (spdk_unlikely(rsp->status.sc == SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR)) { 1795 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 1796 } else { 1797 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 1798 } 1799 nvmf_tcp_req_process(ttransport, tcp_req); 1800 } 1801 } 1802 1803 static void 1804 nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req) 1805 { 1806 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req, 1807 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]); 1808 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1809 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1810 SPDK_DEBUGLOG(nvmf_tcp, "The offset from the start of the PDU header is %u\n", 1811 DGET32(h2c_term_req->fei)); 1812 } 1813 } 1814 1815 static void 1816 nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1817 struct nvme_tcp_pdu *pdu) 1818 { 1819 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1820 uint32_t error_offset = 0; 1821 enum spdk_nvme_tcp_term_req_fes fes; 1822 1823 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1824 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for h2c_term_req pdu=%p\n", pdu); 1825 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1826 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1827 goto end; 1828 } 1829 1830 /* set the data buffer */ 1831 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen, 1832 h2c_term_req->common.plen - h2c_term_req->common.hlen); 1833 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1834 return; 1835 end: 1836 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 1837 } 1838 1839 static void 1840 nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, 1841 struct nvme_tcp_pdu *pdu) 1842 { 1843 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req; 1844 1845 nvmf_tcp_h2c_term_req_dump(h2c_term_req); 1846 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1847 } 1848 1849 static void 1850 _nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1851 { 1852 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 1853 struct spdk_nvmf_tcp_transport, transport); 1854 1855 switch (pdu->hdr.common.pdu_type) { 1856 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1857 nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu); 1858 break; 1859 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 1860 nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu); 1861 break; 1862 1863 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1864 nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu); 1865 break; 1866 1867 default: 1868 /* The code should not go to here */ 1869 SPDK_ERRLOG("ERROR pdu type %d\n", pdu->hdr.common.pdu_type); 1870 break; 1871 } 1872 SLIST_INSERT_HEAD(&tqpair->tcp_pdu_free_queue, pdu, slist); 1873 } 1874 1875 static void 1876 data_crc32_calc_done(void *cb_arg, int status) 1877 { 1878 struct nvme_tcp_pdu *pdu = cb_arg; 1879 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair; 1880 struct spdk_nvmf_tcp_req *tcp_req; 1881 struct spdk_nvme_cpl *rsp; 1882 1883 /* async crc32 calculation is failed and use direct calculation to check */ 1884 if (spdk_unlikely(status)) { 1885 SPDK_ERRLOG("Data digest on tqpair=(%p) with pdu=%p failed to be calculated asynchronously\n", 1886 tqpair, pdu); 1887 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1888 } 1889 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1890 if (!MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32)) { 1891 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1892 tcp_req = pdu->req; 1893 assert(tcp_req != NULL); 1894 rsp = &tcp_req->req.rsp->nvme_cpl; 1895 rsp->status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1896 } 1897 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 1898 } 1899 1900 static void 1901 nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1902 { 1903 int rc = 0; 1904 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1905 tqpair->pdu_in_progress = NULL; 1906 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1907 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 1908 /* check data digest if need */ 1909 if (pdu->ddgst_enable) { 1910 if (!pdu->dif_ctx && tqpair->group && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0)) { 1911 rc = spdk_accel_submit_crc32cv(tqpair->group->accel_channel, &pdu->data_digest_crc32, pdu->data_iov, 1912 pdu->data_iovcnt, 0, data_crc32_calc_done, pdu); 1913 if (spdk_likely(rc == 0)) { 1914 return; 1915 } 1916 } else { 1917 pdu->data_digest_crc32 = nvme_tcp_pdu_calc_data_digest(pdu); 1918 } 1919 data_crc32_calc_done(pdu, rc); 1920 } else { 1921 _nvmf_tcp_pdu_payload_handle(tqpair, pdu); 1922 } 1923 } 1924 1925 static void 1926 nvmf_tcp_send_icresp_complete(void *cb_arg) 1927 { 1928 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg; 1929 1930 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_RUNNING); 1931 } 1932 1933 static void 1934 nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport, 1935 struct spdk_nvmf_tcp_qpair *tqpair, 1936 struct nvme_tcp_pdu *pdu) 1937 { 1938 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req; 1939 struct nvme_tcp_pdu *rsp_pdu; 1940 struct spdk_nvme_tcp_ic_resp *ic_resp; 1941 uint32_t error_offset = 0; 1942 enum spdk_nvme_tcp_term_req_fes fes; 1943 1944 /* Only PFV 0 is defined currently */ 1945 if (ic_req->pfv != 0) { 1946 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv); 1947 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1948 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv); 1949 goto end; 1950 } 1951 1952 /* This value is 0’s based value in units of dwords should not be larger than SPDK_NVME_TCP_HPDA_MAX */ 1953 if (ic_req->hpda > SPDK_NVME_TCP_HPDA_MAX) { 1954 SPDK_ERRLOG("ICReq HPDA out of range 0 to 31, got %u\n", ic_req->hpda); 1955 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1956 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, hpda); 1957 goto end; 1958 } 1959 1960 /* MAXR2T is 0's based */ 1961 SPDK_DEBUGLOG(nvmf_tcp, "maxr2t =%u\n", (ic_req->maxr2t + 1u)); 1962 1963 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false; 1964 if (!tqpair->host_hdgst_enable) { 1965 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1966 } 1967 1968 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false; 1969 if (!tqpair->host_ddgst_enable) { 1970 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR; 1971 } 1972 1973 tqpair->recv_buf_size = spdk_max(tqpair->recv_buf_size, MIN_SOCK_PIPE_SIZE); 1974 /* Now that we know whether digests are enabled, properly size the receive buffer */ 1975 if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) { 1976 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1977 tqpair, 1978 tqpair->recv_buf_size); 1979 /* Not fatal. */ 1980 } 1981 1982 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX); 1983 SPDK_DEBUGLOG(nvmf_tcp, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda); 1984 1985 rsp_pdu = tqpair->mgmt_pdu; 1986 1987 ic_resp = &rsp_pdu->hdr.ic_resp; 1988 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP; 1989 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp); 1990 ic_resp->pfv = 0; 1991 ic_resp->cpda = tqpair->cpda; 1992 ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size; 1993 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0; 1994 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0; 1995 1996 SPDK_DEBUGLOG(nvmf_tcp, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 1997 SPDK_DEBUGLOG(nvmf_tcp, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 1998 1999 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_INITIALIZING); 2000 nvmf_tcp_qpair_write_mgmt_pdu(tqpair, nvmf_tcp_send_icresp_complete, tqpair); 2001 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2002 return; 2003 end: 2004 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2005 } 2006 2007 static void 2008 nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair, 2009 struct spdk_nvmf_tcp_transport *ttransport) 2010 { 2011 struct nvme_tcp_pdu *pdu; 2012 int rc; 2013 uint32_t crc32c, error_offset = 0; 2014 enum spdk_nvme_tcp_term_req_fes fes; 2015 2016 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2017 pdu = tqpair->pdu_in_progress; 2018 2019 SPDK_DEBUGLOG(nvmf_tcp, "pdu type of tqpair(%p) is %d\n", tqpair, 2020 pdu->hdr.common.pdu_type); 2021 /* check header digest if needed */ 2022 if (pdu->has_hdgst) { 2023 SPDK_DEBUGLOG(nvmf_tcp, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair); 2024 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 2025 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 2026 if (rc == 0) { 2027 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 2028 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 2029 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2030 return; 2031 2032 } 2033 } 2034 2035 switch (pdu->hdr.common.pdu_type) { 2036 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ: 2037 nvmf_tcp_icreq_handle(ttransport, tqpair, pdu); 2038 break; 2039 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2040 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ); 2041 break; 2042 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2043 nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu); 2044 break; 2045 2046 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2047 nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu); 2048 break; 2049 2050 default: 2051 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress->hdr.common.pdu_type); 2052 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2053 error_offset = 1; 2054 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2055 break; 2056 } 2057 } 2058 2059 static void 2060 nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair) 2061 { 2062 struct nvme_tcp_pdu *pdu; 2063 uint32_t error_offset = 0; 2064 enum spdk_nvme_tcp_term_req_fes fes; 2065 uint8_t expected_hlen, pdo; 2066 bool plen_error = false, pdo_error = false; 2067 2068 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2069 pdu = tqpair->pdu_in_progress; 2070 assert(pdu); 2071 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) { 2072 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 2073 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu); 2074 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2075 goto err; 2076 } 2077 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req); 2078 if (pdu->hdr.common.plen != expected_hlen) { 2079 plen_error = true; 2080 } 2081 } else { 2082 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 2083 SPDK_ERRLOG("The TCP/IP connection is not negotiated\n"); 2084 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 2085 goto err; 2086 } 2087 2088 switch (pdu->hdr.common.pdu_type) { 2089 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD: 2090 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd); 2091 pdo = pdu->hdr.common.pdo; 2092 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2093 pdo_error = true; 2094 break; 2095 } 2096 2097 if (pdu->hdr.common.plen < expected_hlen) { 2098 plen_error = true; 2099 } 2100 break; 2101 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA: 2102 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr); 2103 pdo = pdu->hdr.common.pdo; 2104 if ((tqpair->cpda != 0) && (pdo % ((tqpair->cpda + 1) << 2) != 0)) { 2105 pdo_error = true; 2106 break; 2107 } 2108 if (pdu->hdr.common.plen < expected_hlen) { 2109 plen_error = true; 2110 } 2111 break; 2112 2113 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 2114 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 2115 if ((pdu->hdr.common.plen <= expected_hlen) || 2116 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 2117 plen_error = true; 2118 } 2119 break; 2120 2121 default: 2122 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type); 2123 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2124 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 2125 goto err; 2126 } 2127 } 2128 2129 if (pdu->hdr.common.hlen != expected_hlen) { 2130 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n", 2131 pdu->hdr.common.pdu_type, 2132 expected_hlen, pdu->hdr.common.hlen, tqpair); 2133 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2134 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 2135 goto err; 2136 } else if (pdo_error) { 2137 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2138 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo); 2139 } else if (plen_error) { 2140 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2141 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2142 goto err; 2143 } else { 2144 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 2145 nvme_tcp_pdu_calc_psh_len(tqpair->pdu_in_progress, tqpair->host_hdgst_enable); 2146 return; 2147 } 2148 err: 2149 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset); 2150 } 2151 2152 static int 2153 nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) 2154 { 2155 int rc = 0; 2156 struct nvme_tcp_pdu *pdu; 2157 enum nvme_tcp_pdu_recv_state prev_state; 2158 uint32_t data_len; 2159 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 2160 struct spdk_nvmf_tcp_transport, transport); 2161 2162 /* The loop here is to allow for several back-to-back state changes. */ 2163 do { 2164 prev_state = tqpair->recv_state; 2165 SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state); 2166 2167 pdu = tqpair->pdu_in_progress; 2168 assert(pdu || tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2169 switch (tqpair->recv_state) { 2170 /* Wait for the common header */ 2171 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 2172 if (!pdu) { 2173 pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue); 2174 if (spdk_unlikely(!pdu)) { 2175 return NVME_TCP_PDU_IN_PROGRESS; 2176 } 2177 SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist); 2178 tqpair->pdu_in_progress = pdu; 2179 } 2180 memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair)); 2181 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 2182 /* FALLTHROUGH */ 2183 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 2184 if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 2185 return rc; 2186 } 2187 2188 rc = nvme_tcp_read_data(tqpair->sock, 2189 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 2190 (void *)&pdu->hdr.common + pdu->ch_valid_bytes); 2191 if (rc < 0) { 2192 SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair); 2193 return NVME_TCP_PDU_FATAL; 2194 } else if (rc > 0) { 2195 pdu->ch_valid_bytes += rc; 2196 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair); 2197 } 2198 2199 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 2200 return NVME_TCP_PDU_IN_PROGRESS; 2201 } 2202 2203 /* The command header of this PDU has now been read from the socket. */ 2204 nvmf_tcp_pdu_ch_handle(tqpair); 2205 break; 2206 /* Wait for the pdu specific header */ 2207 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 2208 rc = nvme_tcp_read_data(tqpair->sock, 2209 pdu->psh_len - pdu->psh_valid_bytes, 2210 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 2211 if (rc < 0) { 2212 return NVME_TCP_PDU_FATAL; 2213 } else if (rc > 0) { 2214 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair); 2215 pdu->psh_valid_bytes += rc; 2216 } 2217 2218 if (pdu->psh_valid_bytes < pdu->psh_len) { 2219 return NVME_TCP_PDU_IN_PROGRESS; 2220 } 2221 2222 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 2223 nvmf_tcp_pdu_psh_handle(tqpair, ttransport); 2224 break; 2225 /* Wait for the req slot */ 2226 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ: 2227 nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu); 2228 break; 2229 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 2230 /* check whether the data is valid, if not we just return */ 2231 if (!pdu->data_len) { 2232 return NVME_TCP_PDU_IN_PROGRESS; 2233 } 2234 2235 data_len = pdu->data_len; 2236 /* data digest */ 2237 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) && 2238 tqpair->host_ddgst_enable)) { 2239 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2240 pdu->ddgst_enable = true; 2241 } 2242 2243 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2244 if (rc < 0) { 2245 return NVME_TCP_PDU_FATAL; 2246 } 2247 pdu->rw_offset += rc; 2248 2249 if (pdu->rw_offset < data_len) { 2250 return NVME_TCP_PDU_IN_PROGRESS; 2251 } 2252 2253 /* Generate and insert DIF to whole data block received if DIF is enabled */ 2254 if (spdk_unlikely(pdu->dif_ctx != NULL) && 2255 spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 0, data_len, 2256 pdu->dif_ctx) != 0) { 2257 SPDK_ERRLOG("DIF generate failed\n"); 2258 return NVME_TCP_PDU_FATAL; 2259 } 2260 2261 /* All of this PDU has now been read from the socket. */ 2262 nvmf_tcp_pdu_payload_handle(tqpair, pdu); 2263 break; 2264 case NVME_TCP_PDU_RECV_STATE_ERROR: 2265 if (!spdk_sock_is_connected(tqpair->sock)) { 2266 return NVME_TCP_PDU_FATAL; 2267 } 2268 break; 2269 default: 2270 SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state); 2271 abort(); 2272 break; 2273 } 2274 } while (tqpair->recv_state != prev_state); 2275 2276 return rc; 2277 } 2278 2279 static inline void * 2280 nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list) 2281 { 2282 struct spdk_nvmf_tcp_control_msg *msg; 2283 2284 assert(list); 2285 2286 msg = STAILQ_FIRST(&list->free_msgs); 2287 if (!msg) { 2288 SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n"); 2289 return NULL; 2290 } 2291 STAILQ_REMOVE_HEAD(&list->free_msgs, link); 2292 return msg; 2293 } 2294 2295 static inline void 2296 nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg) 2297 { 2298 struct spdk_nvmf_tcp_control_msg *msg = _msg; 2299 2300 assert(list); 2301 STAILQ_INSERT_HEAD(&list->free_msgs, msg, link); 2302 } 2303 2304 static int 2305 nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, 2306 struct spdk_nvmf_transport *transport, 2307 struct spdk_nvmf_transport_poll_group *group) 2308 { 2309 struct spdk_nvmf_request *req = &tcp_req->req; 2310 struct spdk_nvme_cmd *cmd; 2311 struct spdk_nvme_sgl_descriptor *sgl; 2312 struct spdk_nvmf_tcp_poll_group *tgroup; 2313 enum spdk_nvme_tcp_term_req_fes fes; 2314 struct nvme_tcp_pdu *pdu; 2315 struct spdk_nvmf_tcp_qpair *tqpair; 2316 uint32_t length, error_offset = 0; 2317 2318 cmd = &req->cmd->nvme_cmd; 2319 sgl = &cmd->dptr.sgl1; 2320 2321 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK && 2322 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) { 2323 /* get request length from sgl */ 2324 length = sgl->unkeyed.length; 2325 if (spdk_unlikely(length > transport->opts.max_io_size)) { 2326 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 2327 length, transport->opts.max_io_size); 2328 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2329 goto fatal_err; 2330 } 2331 2332 /* fill request length and populate iovs */ 2333 req->length = length; 2334 2335 SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length); 2336 2337 if (spdk_unlikely(req->dif_enabled)) { 2338 req->dif.orig_length = length; 2339 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2340 req->dif.elba_length = length; 2341 } 2342 2343 if (nvmf_ctrlr_use_zcopy(req)) { 2344 SPDK_DEBUGLOG(nvmf_tcp, "Using zero-copy to execute request %p\n", tcp_req); 2345 req->data_from_pool = false; 2346 return 0; 2347 } 2348 2349 if (spdk_nvmf_request_get_buffers(req, group, transport, length)) { 2350 /* No available buffers. Queue this request up. */ 2351 SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n", 2352 tcp_req); 2353 return 0; 2354 } 2355 2356 /* backward compatible */ 2357 req->data = req->iov[0].iov_base; 2358 2359 SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n", 2360 tcp_req, req->iovcnt, req->data); 2361 2362 return 0; 2363 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 2364 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 2365 uint64_t offset = sgl->address; 2366 uint32_t max_len = transport->opts.in_capsule_data_size; 2367 2368 assert(tcp_req->has_in_capsule_data); 2369 /* Capsule Cmd with In-capsule Data should get data length from pdu header */ 2370 tqpair = tcp_req->pdu->qpair; 2371 /* receiving pdu is not same with the pdu in tcp_req */ 2372 pdu = tqpair->pdu_in_progress; 2373 length = pdu->hdr.common.plen - pdu->psh_len - sizeof(struct spdk_nvme_tcp_common_pdu_hdr); 2374 if (tqpair->host_ddgst_enable) { 2375 length -= SPDK_NVME_TCP_DIGEST_LEN; 2376 } 2377 /* This error is not defined in NVMe/TCP spec, take this error as fatal error */ 2378 if (spdk_unlikely(length != sgl->unkeyed.length)) { 2379 SPDK_ERRLOG("In-Capsule Data length 0x%x is not equal to SGL data length 0x%x\n", 2380 length, sgl->unkeyed.length); 2381 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 2382 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 2383 goto fatal_err; 2384 } 2385 2386 SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 2387 offset, length); 2388 2389 /* The NVMe/TCP transport does not use ICDOFF to control the in-capsule data offset. ICDOFF should be '0' */ 2390 if (spdk_unlikely(offset != 0)) { 2391 /* Not defined fatal error in NVMe/TCP spec, handle this error as a fatal error */ 2392 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " should be ZERO in NVMe/TCP\n", offset); 2393 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2394 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.address); 2395 goto fatal_err; 2396 } 2397 2398 if (spdk_unlikely(length > max_len)) { 2399 /* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */ 2400 if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE && 2401 (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) { 2402 2403 /* Get a buffer from dedicated list */ 2404 SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n"); 2405 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2406 assert(tgroup->control_msg_list); 2407 req->data = nvmf_tcp_control_msg_get(tgroup->control_msg_list); 2408 if (!req->data) { 2409 /* No available buffers. Queue this request up. */ 2410 SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req); 2411 return 0; 2412 } 2413 } else { 2414 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 2415 length, max_len); 2416 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED; 2417 goto fatal_err; 2418 } 2419 } else { 2420 req->data = tcp_req->buf; 2421 } 2422 2423 req->length = length; 2424 req->data_from_pool = false; 2425 2426 if (spdk_unlikely(req->dif_enabled)) { 2427 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); 2428 req->dif.elba_length = length; 2429 } 2430 2431 req->iov[0].iov_base = req->data; 2432 req->iov[0].iov_len = length; 2433 req->iovcnt = 1; 2434 2435 return 0; 2436 } 2437 /* If we want to handle the problem here, then we can't skip the following data segment. 2438 * Because this function runs before reading data part, now handle all errors as fatal errors. */ 2439 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 2440 sgl->generic.type, sgl->generic.subtype); 2441 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER; 2442 error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.generic); 2443 fatal_err: 2444 nvmf_tcp_send_c2h_term_req(tcp_req->pdu->qpair, tcp_req->pdu, fes, error_offset); 2445 return -1; 2446 } 2447 2448 static inline enum spdk_nvme_media_error_status_code 2449 nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) { 2450 enum spdk_nvme_media_error_status_code result; 2451 2452 switch (err_type) 2453 { 2454 case SPDK_DIF_REFTAG_ERROR: 2455 result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR; 2456 break; 2457 case SPDK_DIF_APPTAG_ERROR: 2458 result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR; 2459 break; 2460 case SPDK_DIF_GUARD_ERROR: 2461 result = SPDK_NVME_SC_GUARD_CHECK_ERROR; 2462 break; 2463 default: 2464 SPDK_UNREACHABLE(); 2465 break; 2466 } 2467 2468 return result; 2469 } 2470 2471 static void 2472 _nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2473 struct spdk_nvmf_tcp_req *tcp_req) 2474 { 2475 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF( 2476 tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport); 2477 struct nvme_tcp_pdu *rsp_pdu; 2478 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 2479 uint32_t plen, pdo, alignment; 2480 int rc; 2481 2482 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2483 2484 rsp_pdu = tcp_req->pdu; 2485 assert(rsp_pdu != NULL); 2486 2487 c2h_data = &rsp_pdu->hdr.c2h_data; 2488 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA; 2489 plen = c2h_data->common.hlen = sizeof(*c2h_data); 2490 2491 if (tqpair->host_hdgst_enable) { 2492 plen += SPDK_NVME_TCP_DIGEST_LEN; 2493 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 2494 } 2495 2496 /* set the psh */ 2497 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid; 2498 c2h_data->datal = tcp_req->req.length - tcp_req->pdu->rw_offset; 2499 c2h_data->datao = tcp_req->pdu->rw_offset; 2500 2501 /* set the padding */ 2502 rsp_pdu->padding_len = 0; 2503 pdo = plen; 2504 if (tqpair->cpda) { 2505 alignment = (tqpair->cpda + 1) << 2; 2506 if (plen % alignment != 0) { 2507 pdo = (plen + alignment) / alignment * alignment; 2508 rsp_pdu->padding_len = pdo - plen; 2509 plen = pdo; 2510 } 2511 } 2512 2513 c2h_data->common.pdo = pdo; 2514 plen += c2h_data->datal; 2515 if (tqpair->host_ddgst_enable) { 2516 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 2517 plen += SPDK_NVME_TCP_DIGEST_LEN; 2518 } 2519 2520 c2h_data->common.plen = plen; 2521 2522 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2523 rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx; 2524 } 2525 2526 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2527 c2h_data->datao, c2h_data->datal); 2528 2529 2530 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 2531 /* Need to send the capsule response if response is not all 0 */ 2532 if (ttransport->tcp_opts.c2h_success && 2533 tcp_req->rsp.cdw0 == 0 && tcp_req->rsp.cdw1 == 0) { 2534 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 2535 } 2536 2537 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2538 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl; 2539 struct spdk_dif_error err_blk = {}; 2540 uint32_t mapped_length = 0; 2541 uint32_t available_iovs = SPDK_COUNTOF(rsp_pdu->iov); 2542 uint32_t ddgst_len = 0; 2543 2544 if (tqpair->host_ddgst_enable) { 2545 /* Data digest consumes additional iov entry */ 2546 available_iovs--; 2547 /* plen needs to be updated since nvme_tcp_build_iovs compares expected and actual plen */ 2548 ddgst_len = SPDK_NVME_TCP_DIGEST_LEN; 2549 c2h_data->common.plen -= ddgst_len; 2550 } 2551 /* Temp call to estimate if data can be described by limited number of iovs. 2552 * iov vector will be rebuilt in nvmf_tcp_qpair_write_pdu */ 2553 nvme_tcp_build_iovs(rsp_pdu->iov, available_iovs, rsp_pdu, tqpair->host_hdgst_enable, 2554 false, &mapped_length); 2555 2556 if (mapped_length != c2h_data->common.plen) { 2557 c2h_data->datal = mapped_length - (c2h_data->common.plen - c2h_data->datal); 2558 SPDK_DEBUGLOG(nvmf_tcp, 2559 "Part C2H, data_len %u (of %u), PDU len %u, updated PDU len %u, offset %u\n", 2560 c2h_data->datal, tcp_req->req.length, c2h_data->common.plen, mapped_length, rsp_pdu->rw_offset); 2561 c2h_data->common.plen = mapped_length; 2562 2563 /* Rebuild pdu->data_iov since data length is changed */ 2564 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt, c2h_data->datao, 2565 c2h_data->datal); 2566 2567 c2h_data->common.flags &= ~(SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU | 2568 SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS); 2569 } 2570 2571 c2h_data->common.plen += ddgst_len; 2572 2573 assert(rsp_pdu->rw_offset <= tcp_req->req.length); 2574 2575 rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt, 2576 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk); 2577 if (rc != 0) { 2578 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2579 err_blk.err_type, err_blk.err_offset); 2580 rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR; 2581 rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type); 2582 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2583 return; 2584 } 2585 } 2586 2587 rsp_pdu->rw_offset += c2h_data->datal; 2588 nvmf_tcp_qpair_write_req_pdu(tqpair, tcp_req, nvmf_tcp_pdu_c2h_data_complete, tcp_req); 2589 } 2590 2591 static void 2592 nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair, 2593 struct spdk_nvmf_tcp_req *tcp_req) 2594 { 2595 nvmf_tcp_req_pdu_init(tcp_req); 2596 _nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2597 } 2598 2599 static int 2600 request_transfer_out(struct spdk_nvmf_request *req) 2601 { 2602 struct spdk_nvmf_tcp_req *tcp_req; 2603 struct spdk_nvmf_qpair *qpair; 2604 struct spdk_nvmf_tcp_qpair *tqpair; 2605 struct spdk_nvme_cpl *rsp; 2606 2607 SPDK_DEBUGLOG(nvmf_tcp, "enter\n"); 2608 2609 qpair = req->qpair; 2610 rsp = &req->rsp->nvme_cpl; 2611 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 2612 2613 /* Advance our sq_head pointer */ 2614 if (qpair->sq_head == qpair->sq_head_max) { 2615 qpair->sq_head = 0; 2616 } else { 2617 qpair->sq_head++; 2618 } 2619 rsp->sqhd = qpair->sq_head; 2620 2621 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2622 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST); 2623 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 2624 nvmf_tcp_send_c2h_data(tqpair, tcp_req); 2625 } else { 2626 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair); 2627 } 2628 2629 return 0; 2630 } 2631 2632 static void 2633 nvmf_tcp_check_fused_ordering(struct spdk_nvmf_tcp_transport *ttransport, 2634 struct spdk_nvmf_tcp_qpair *tqpair, 2635 struct spdk_nvmf_tcp_req *tcp_req) 2636 { 2637 enum spdk_nvme_cmd_fuse last, next; 2638 2639 last = tqpair->fused_first ? tqpair->fused_first->cmd.fuse : SPDK_NVME_CMD_FUSE_NONE; 2640 next = tcp_req->cmd.fuse; 2641 2642 assert(last != SPDK_NVME_CMD_FUSE_SECOND); 2643 2644 if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) { 2645 return; 2646 } 2647 2648 if (last == SPDK_NVME_CMD_FUSE_FIRST) { 2649 if (next == SPDK_NVME_CMD_FUSE_SECOND) { 2650 /* This is a valid pair of fused commands. Point them at each other 2651 * so they can be submitted consecutively once ready to be executed. 2652 */ 2653 tqpair->fused_first->fused_pair = tcp_req; 2654 tcp_req->fused_pair = tqpair->fused_first; 2655 tqpair->fused_first = NULL; 2656 return; 2657 } else { 2658 /* Mark the last req as failed since it wasn't followed by a SECOND. */ 2659 tqpair->fused_first->fused_failed = true; 2660 2661 /* 2662 * If the last req is in READY_TO_EXECUTE state, then call 2663 * nvmf_tcp_req_process(), otherwise nothing else will kick it. 2664 */ 2665 if (tqpair->fused_first->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 2666 nvmf_tcp_req_process(ttransport, tqpair->fused_first); 2667 } 2668 2669 tqpair->fused_first = NULL; 2670 } 2671 } 2672 2673 if (next == SPDK_NVME_CMD_FUSE_FIRST) { 2674 /* Set tqpair->fused_first here so that we know to check that the next request 2675 * is a SECOND (and to fail this one if it isn't). 2676 */ 2677 tqpair->fused_first = tcp_req; 2678 } else if (next == SPDK_NVME_CMD_FUSE_SECOND) { 2679 /* Mark this req failed since it is a SECOND and the last one was not a FIRST. */ 2680 tcp_req->fused_failed = true; 2681 } 2682 } 2683 2684 static bool 2685 nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, 2686 struct spdk_nvmf_tcp_req *tcp_req) 2687 { 2688 struct spdk_nvmf_tcp_qpair *tqpair; 2689 uint32_t plen; 2690 struct nvme_tcp_pdu *pdu; 2691 enum spdk_nvmf_tcp_req_state prev_state; 2692 bool progress = false; 2693 struct spdk_nvmf_transport *transport = &ttransport->transport; 2694 struct spdk_nvmf_transport_poll_group *group; 2695 struct spdk_nvmf_tcp_poll_group *tgroup; 2696 2697 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); 2698 group = &tqpair->group->group; 2699 assert(tcp_req->state != TCP_REQUEST_STATE_FREE); 2700 2701 /* If the qpair is not active, we need to abort the outstanding requests. */ 2702 if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 2703 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) { 2704 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2705 } 2706 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 2707 } 2708 2709 /* The loop here is to allow for several back-to-back state changes. */ 2710 do { 2711 prev_state = tcp_req->state; 2712 2713 SPDK_DEBUGLOG(nvmf_tcp, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state, 2714 tqpair); 2715 2716 switch (tcp_req->state) { 2717 case TCP_REQUEST_STATE_FREE: 2718 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW 2719 * to escape this state. */ 2720 break; 2721 case TCP_REQUEST_STATE_NEW: 2722 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, tqpair); 2723 2724 /* copy the cmd from the receive pdu */ 2725 tcp_req->cmd = tqpair->pdu_in_progress->hdr.capsule_cmd.ccsqe; 2726 2727 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) { 2728 tcp_req->req.dif_enabled = true; 2729 tqpair->pdu_in_progress->dif_ctx = &tcp_req->req.dif.dif_ctx; 2730 } 2731 2732 nvmf_tcp_check_fused_ordering(ttransport, tqpair, tcp_req); 2733 2734 /* The next state transition depends on the data transfer needs of this request. */ 2735 tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req); 2736 2737 if (spdk_unlikely(tcp_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) { 2738 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2739 tcp_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE; 2740 tcp_req->req.rsp->nvme_cpl.cid = tcp_req->req.cmd->nvme_cmd.cid; 2741 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2742 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2743 SPDK_DEBUGLOG(nvmf_tcp, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", tcp_req); 2744 break; 2745 } 2746 2747 /* If no data to transfer, ready to execute. */ 2748 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) { 2749 /* Reset the tqpair receiving pdu state */ 2750 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2751 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2752 break; 2753 } 2754 2755 pdu = tqpair->pdu_in_progress; 2756 plen = pdu->hdr.common.hlen; 2757 if (tqpair->host_hdgst_enable) { 2758 plen += SPDK_NVME_TCP_DIGEST_LEN; 2759 } 2760 if (pdu->hdr.common.plen != plen) { 2761 tcp_req->has_in_capsule_data = true; 2762 } else { 2763 /* Data is transmitted by C2H PDUs */ 2764 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2765 } 2766 2767 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER); 2768 STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link); 2769 break; 2770 case TCP_REQUEST_STATE_NEED_BUFFER: 2771 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2772 tqpair); 2773 2774 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE); 2775 2776 if (!tcp_req->has_in_capsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) { 2777 SPDK_DEBUGLOG(nvmf_tcp, 2778 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n", 2779 tcp_req, tqpair); 2780 /* This request needs to wait in line to obtain a buffer */ 2781 break; 2782 } 2783 2784 /* Try to get a data buffer */ 2785 if (nvmf_tcp_req_parse_sgl(tcp_req, transport, group) < 0) { 2786 break; 2787 } 2788 2789 /* Get a zcopy buffer if the request can be serviced through zcopy */ 2790 if (spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 2791 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2792 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 2793 tcp_req->req.length = tcp_req->req.dif.elba_length; 2794 } 2795 2796 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2797 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_START); 2798 spdk_nvmf_request_zcopy_start(&tcp_req->req); 2799 break; 2800 } 2801 2802 if (!tcp_req->req.data) { 2803 SPDK_DEBUGLOG(nvmf_tcp, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)", 2804 tcp_req, tqpair); 2805 /* No buffers available. */ 2806 break; 2807 } 2808 2809 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link); 2810 2811 /* If data is transferring from host to controller, we need to do a transfer from the host. */ 2812 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2813 if (tcp_req->req.data_from_pool) { 2814 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2815 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2816 } else { 2817 struct nvme_tcp_pdu *pdu; 2818 2819 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2820 2821 pdu = tqpair->pdu_in_progress; 2822 SPDK_DEBUGLOG(nvmf_tcp, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req, 2823 tqpair); 2824 /* No need to send r2t, contained in the capsuled data */ 2825 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt, 2826 0, tcp_req->req.length); 2827 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 2828 } 2829 break; 2830 } 2831 2832 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE); 2833 break; 2834 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 2835 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_START, tqpair->qpair.qid, 0, 2836 (uintptr_t)tcp_req, tqpair); 2837 /* Some external code must kick a request into TCP_REQUEST_STATE_ZCOPY_START_COMPLETED 2838 * to escape this state. */ 2839 break; 2840 case TCP_REQUEST_STATE_ZCOPY_START_COMPLETED: 2841 spdk_trace_record(TRACE_TCP_REQUEST_STATE_ZCOPY_START_COMPLETED, tqpair->qpair.qid, 0, 2842 (uintptr_t)tcp_req, tqpair); 2843 if (spdk_unlikely(spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl))) { 2844 SPDK_DEBUGLOG(nvmf_tcp, "Zero-copy start failed for tcp_req(%p) on tqpair=%p\n", 2845 tcp_req, tqpair); 2846 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2847 break; 2848 } 2849 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 2850 SPDK_DEBUGLOG(nvmf_tcp, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 2851 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req); 2852 } else { 2853 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 2854 } 2855 break; 2856 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 2857 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2858 tqpair); 2859 /* The R2T completion or the h2c data incoming will kick it out of this state. */ 2860 break; 2861 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 2862 2863 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, tqpair->qpair.qid, 0, 2864 (uintptr_t)tcp_req, tqpair); 2865 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE 2866 * to escape this state. */ 2867 break; 2868 case TCP_REQUEST_STATE_READY_TO_EXECUTE: 2869 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, tqpair->qpair.qid, 0, 2870 (uintptr_t)tcp_req, tqpair); 2871 2872 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2873 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length); 2874 tcp_req->req.length = tcp_req->req.dif.elba_length; 2875 } 2876 2877 if (tcp_req->cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) { 2878 if (tcp_req->fused_failed) { 2879 /* This request failed FUSED semantics. Fail it immediately, without 2880 * even sending it to the target layer. 2881 */ 2882 tcp_req->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2883 tcp_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED; 2884 tcp_req->req.rsp->nvme_cpl.cid = tcp_req->req.cmd->nvme_cmd.cid; 2885 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2886 break; 2887 } 2888 2889 if (tcp_req->fused_pair == NULL || 2890 tcp_req->fused_pair->state != TCP_REQUEST_STATE_READY_TO_EXECUTE) { 2891 /* This request is ready to execute, but either we don't know yet if it's 2892 * valid - i.e. this is a FIRST but we haven't received the next request yet), 2893 * or the other request of this fused pair isn't ready to execute. So 2894 * break here and this request will get processed later either when the 2895 * other request is ready or we find that this request isn't valid. 2896 */ 2897 break; 2898 } 2899 } 2900 2901 if (!spdk_nvmf_request_using_zcopy(&tcp_req->req)) { 2902 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING); 2903 /* If we get to this point, and this request is a fused command, we know that 2904 * it is part of a valid sequence (FIRST followed by a SECOND) and that both 2905 * requests are READY_TO_EXECUTE. So call spdk_nvmf_request_exec() both on this 2906 * request, and the other request of the fused pair, in the correct order. 2907 * Also clear the ->fused_pair pointers on both requests, since after this point 2908 * we no longer need to maintain the relationship between these two requests. 2909 */ 2910 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) { 2911 assert(tcp_req->fused_pair != NULL); 2912 assert(tcp_req->fused_pair->fused_pair == tcp_req); 2913 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 2914 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 2915 tcp_req->fused_pair->fused_pair = NULL; 2916 tcp_req->fused_pair = NULL; 2917 } 2918 spdk_nvmf_request_exec(&tcp_req->req); 2919 if (tcp_req->cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) { 2920 assert(tcp_req->fused_pair != NULL); 2921 assert(tcp_req->fused_pair->fused_pair == tcp_req); 2922 nvmf_tcp_req_set_state(tcp_req->fused_pair, TCP_REQUEST_STATE_EXECUTING); 2923 spdk_nvmf_request_exec(&tcp_req->fused_pair->req); 2924 tcp_req->fused_pair->fused_pair = NULL; 2925 tcp_req->fused_pair = NULL; 2926 } 2927 } else { 2928 /* For zero-copy, only requests with data coming from host to the 2929 * controller can end up here. */ 2930 assert(tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER); 2931 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT); 2932 spdk_nvmf_request_zcopy_end(&tcp_req->req, true); 2933 } 2934 2935 break; 2936 case TCP_REQUEST_STATE_EXECUTING: 2937 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2938 tqpair); 2939 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 2940 * to escape this state. */ 2941 break; 2942 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 2943 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_COMMIT, tqpair->qpair.qid, 0, 2944 (uintptr_t)tcp_req, tqpair); 2945 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED 2946 * to escape this state. */ 2947 break; 2948 case TCP_REQUEST_STATE_EXECUTED: 2949 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2950 tqpair); 2951 2952 if (spdk_unlikely(tcp_req->req.dif_enabled)) { 2953 tcp_req->req.length = tcp_req->req.dif.orig_length; 2954 } 2955 2956 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE); 2957 break; 2958 case TCP_REQUEST_STATE_READY_TO_COMPLETE: 2959 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, tqpair->qpair.qid, 0, 2960 (uintptr_t)tcp_req, tqpair); 2961 if (request_transfer_out(&tcp_req->req) != 0) { 2962 assert(0); /* No good way to handle this currently */ 2963 } 2964 break; 2965 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 2966 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, tqpair->qpair.qid, 0, 2967 (uintptr_t)tcp_req, tqpair); 2968 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 2969 * to escape this state. */ 2970 break; 2971 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 2972 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_ZCOPY_RELEASE, tqpair->qpair.qid, 0, 2973 (uintptr_t)tcp_req, tqpair); 2974 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED 2975 * to escape this state. */ 2976 break; 2977 case TCP_REQUEST_STATE_COMPLETED: 2978 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, tqpair->qpair.qid, 0, (uintptr_t)tcp_req, 2979 tqpair); 2980 /* If there's an outstanding PDU sent to the host, the request is completed 2981 * due to the qpair being disconnected. We must delay the completion until 2982 * that write is done to avoid freeing the request twice. */ 2983 if (spdk_unlikely(tcp_req->pdu_in_use)) { 2984 SPDK_DEBUGLOG(nvmf_tcp, "Delaying completion due to outstanding " 2985 "write on req=%p\n", tcp_req); 2986 /* This can only happen for zcopy requests */ 2987 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 2988 assert(tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE); 2989 break; 2990 } 2991 2992 if (tcp_req->req.data_from_pool) { 2993 spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); 2994 } else if (spdk_unlikely(tcp_req->has_in_capsule_data && 2995 (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC || 2996 tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) { 2997 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 2998 assert(tgroup->control_msg_list); 2999 SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n"); 3000 nvmf_tcp_control_msg_put(tgroup->control_msg_list, tcp_req->req.data); 3001 } else if (tcp_req->req.zcopy_bdev_io != NULL) { 3002 /* If the request has an unreleased zcopy bdev_io, it's either a 3003 * read, a failed write, or the qpair is being disconnected */ 3004 assert(spdk_nvmf_request_using_zcopy(&tcp_req->req)); 3005 assert(tcp_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 3006 spdk_nvme_cpl_is_error(&tcp_req->req.rsp->nvme_cpl) || 3007 tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE); 3008 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE); 3009 spdk_nvmf_request_zcopy_end(&tcp_req->req, false); 3010 break; 3011 } 3012 tcp_req->req.length = 0; 3013 tcp_req->req.iovcnt = 0; 3014 tcp_req->req.data = NULL; 3015 tcp_req->fused_failed = false; 3016 if (tcp_req->fused_pair) { 3017 /* This req was part of a valid fused pair, but failed before it got to 3018 * READ_TO_EXECUTE state. This means we need to fail the other request 3019 * in the pair, because it is no longer part of a valid pair. If the pair 3020 * already reached READY_TO_EXECUTE state, we need to kick it. 3021 */ 3022 tcp_req->fused_pair->fused_failed = true; 3023 if (tcp_req->fused_pair->state == TCP_REQUEST_STATE_READY_TO_EXECUTE) { 3024 nvmf_tcp_req_process(ttransport, tcp_req->fused_pair); 3025 } 3026 tcp_req->fused_pair = NULL; 3027 } 3028 3029 nvmf_tcp_req_put(tqpair, tcp_req); 3030 break; 3031 case TCP_REQUEST_NUM_STATES: 3032 default: 3033 assert(0); 3034 break; 3035 } 3036 3037 if (tcp_req->state != prev_state) { 3038 progress = true; 3039 } 3040 } while (tcp_req->state != prev_state); 3041 3042 return progress; 3043 } 3044 3045 static void 3046 nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) 3047 { 3048 struct spdk_nvmf_tcp_qpair *tqpair = arg; 3049 int rc; 3050 3051 assert(tqpair != NULL); 3052 rc = nvmf_tcp_sock_process(tqpair); 3053 3054 /* If there was a new socket error, disconnect */ 3055 if (rc < 0) { 3056 nvmf_tcp_qpair_disconnect(tqpair); 3057 } 3058 } 3059 3060 static int 3061 nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 3062 struct spdk_nvmf_qpair *qpair) 3063 { 3064 struct spdk_nvmf_tcp_poll_group *tgroup; 3065 struct spdk_nvmf_tcp_qpair *tqpair; 3066 int rc; 3067 3068 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3069 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3070 3071 rc = nvmf_tcp_qpair_sock_init(tqpair); 3072 if (rc != 0) { 3073 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair); 3074 return -1; 3075 } 3076 3077 rc = nvmf_tcp_qpair_init(&tqpair->qpair); 3078 if (rc < 0) { 3079 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair); 3080 return -1; 3081 } 3082 3083 rc = nvmf_tcp_qpair_init_mem_resource(tqpair); 3084 if (rc < 0) { 3085 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair); 3086 return -1; 3087 } 3088 3089 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock, 3090 nvmf_tcp_sock_cb, tqpair); 3091 if (rc != 0) { 3092 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n", 3093 spdk_strerror(errno), errno); 3094 return -1; 3095 } 3096 3097 tqpair->group = tgroup; 3098 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_INVALID); 3099 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link); 3100 3101 return 0; 3102 } 3103 3104 static int 3105 nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 3106 struct spdk_nvmf_qpair *qpair) 3107 { 3108 struct spdk_nvmf_tcp_poll_group *tgroup; 3109 struct spdk_nvmf_tcp_qpair *tqpair; 3110 int rc; 3111 3112 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3113 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3114 3115 assert(tqpair->group == tgroup); 3116 3117 SPDK_DEBUGLOG(nvmf_tcp, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup); 3118 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) { 3119 TAILQ_REMOVE(&tgroup->await_req, tqpair, link); 3120 } else { 3121 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link); 3122 } 3123 3124 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock); 3125 if (rc != 0) { 3126 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n", 3127 spdk_strerror(errno), errno); 3128 } 3129 3130 return rc; 3131 } 3132 3133 static int 3134 nvmf_tcp_req_complete(struct spdk_nvmf_request *req) 3135 { 3136 struct spdk_nvmf_tcp_transport *ttransport; 3137 struct spdk_nvmf_tcp_req *tcp_req; 3138 3139 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3140 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 3141 3142 switch (tcp_req->state) { 3143 case TCP_REQUEST_STATE_EXECUTING: 3144 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3145 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED); 3146 break; 3147 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3148 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_ZCOPY_START_COMPLETED); 3149 break; 3150 case TCP_REQUEST_STATE_AWAITING_ZCOPY_RELEASE: 3151 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED); 3152 break; 3153 default: 3154 assert(0 && "Unexpected request state"); 3155 break; 3156 } 3157 3158 nvmf_tcp_req_process(ttransport, tcp_req); 3159 3160 return 0; 3161 } 3162 3163 static void 3164 nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair, 3165 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 3166 { 3167 struct spdk_nvmf_tcp_qpair *tqpair; 3168 3169 SPDK_DEBUGLOG(nvmf_tcp, "Qpair: %p\n", qpair); 3170 3171 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3172 3173 assert(tqpair->fini_cb_fn == NULL); 3174 tqpair->fini_cb_fn = cb_fn; 3175 tqpair->fini_cb_arg = cb_arg; 3176 3177 nvmf_tcp_qpair_set_state(tqpair, NVME_TCP_QPAIR_STATE_EXITED); 3178 nvmf_tcp_qpair_destroy(tqpair); 3179 } 3180 3181 static int 3182 nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 3183 { 3184 struct spdk_nvmf_tcp_poll_group *tgroup; 3185 int rc; 3186 struct spdk_nvmf_request *req, *req_tmp; 3187 struct spdk_nvmf_tcp_req *tcp_req; 3188 struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp; 3189 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport, 3190 struct spdk_nvmf_tcp_transport, transport); 3191 3192 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); 3193 3194 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) { 3195 return 0; 3196 } 3197 3198 STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) { 3199 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req); 3200 if (nvmf_tcp_req_process(ttransport, tcp_req) == false) { 3201 break; 3202 } 3203 } 3204 3205 rc = spdk_sock_group_poll(tgroup->sock_group); 3206 if (rc < 0) { 3207 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group); 3208 } 3209 3210 TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) { 3211 nvmf_tcp_sock_process(tqpair); 3212 } 3213 3214 return rc; 3215 } 3216 3217 static int 3218 nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair, 3219 struct spdk_nvme_transport_id *trid, bool peer) 3220 { 3221 struct spdk_nvmf_tcp_qpair *tqpair; 3222 uint16_t port; 3223 3224 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3225 spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP); 3226 3227 if (peer) { 3228 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr); 3229 port = tqpair->initiator_port; 3230 } else { 3231 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr); 3232 port = tqpair->target_port; 3233 } 3234 3235 if (spdk_sock_is_ipv4(tqpair->sock)) { 3236 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 3237 } else if (spdk_sock_is_ipv6(tqpair->sock)) { 3238 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 3239 } else { 3240 return -1; 3241 } 3242 3243 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port); 3244 return 0; 3245 } 3246 3247 static int 3248 nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 3249 struct spdk_nvme_transport_id *trid) 3250 { 3251 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3252 } 3253 3254 static int 3255 nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 3256 struct spdk_nvme_transport_id *trid) 3257 { 3258 return nvmf_tcp_qpair_get_trid(qpair, trid, 1); 3259 } 3260 3261 static int 3262 nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 3263 struct spdk_nvme_transport_id *trid) 3264 { 3265 return nvmf_tcp_qpair_get_trid(qpair, trid, 0); 3266 } 3267 3268 static void 3269 nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req, 3270 struct spdk_nvmf_tcp_req *tcp_req_to_abort) 3271 { 3272 tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 3273 tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; 3274 tcp_req_to_abort->req.rsp->nvme_cpl.cid = tcp_req_to_abort->req.cmd->nvme_cmd.cid; 3275 3276 nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE); 3277 3278 req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */ 3279 } 3280 3281 static int 3282 _nvmf_tcp_qpair_abort_request(void *ctx) 3283 { 3284 struct spdk_nvmf_request *req = ctx; 3285 struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort, 3286 struct spdk_nvmf_tcp_req, req); 3287 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair, 3288 struct spdk_nvmf_tcp_qpair, qpair); 3289 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, 3290 struct spdk_nvmf_tcp_transport, transport); 3291 int rc; 3292 3293 spdk_poller_unregister(&req->poller); 3294 3295 switch (tcp_req_to_abort->state) { 3296 case TCP_REQUEST_STATE_EXECUTING: 3297 case TCP_REQUEST_STATE_AWAITING_ZCOPY_START: 3298 case TCP_REQUEST_STATE_AWAITING_ZCOPY_COMMIT: 3299 rc = nvmf_ctrlr_abort_request(req); 3300 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) { 3301 return SPDK_POLLER_BUSY; 3302 } 3303 break; 3304 3305 case TCP_REQUEST_STATE_NEED_BUFFER: 3306 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, 3307 &tcp_req_to_abort->req, spdk_nvmf_request, buf_link); 3308 3309 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 3310 nvmf_tcp_req_process(ttransport, tcp_req_to_abort); 3311 break; 3312 3313 case TCP_REQUEST_STATE_AWAITING_R2T_ACK: 3314 case TCP_REQUEST_STATE_ZCOPY_START_COMPLETED: 3315 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort); 3316 break; 3317 3318 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 3319 if (spdk_get_ticks() < req->timeout_tsc) { 3320 req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0); 3321 return SPDK_POLLER_BUSY; 3322 } 3323 break; 3324 3325 default: 3326 break; 3327 } 3328 3329 spdk_nvmf_request_complete(req); 3330 return SPDK_POLLER_BUSY; 3331 } 3332 3333 static void 3334 nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 3335 struct spdk_nvmf_request *req) 3336 { 3337 struct spdk_nvmf_tcp_qpair *tqpair; 3338 struct spdk_nvmf_tcp_transport *ttransport; 3339 struct spdk_nvmf_transport *transport; 3340 uint16_t cid; 3341 uint32_t i; 3342 struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL; 3343 3344 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair); 3345 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport); 3346 transport = &ttransport->transport; 3347 3348 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 3349 3350 for (i = 0; i < tqpair->resource_count; i++) { 3351 if (tqpair->reqs[i].state != TCP_REQUEST_STATE_FREE && 3352 tqpair->reqs[i].req.cmd->nvme_cmd.cid == cid) { 3353 tcp_req_to_abort = &tqpair->reqs[i]; 3354 break; 3355 } 3356 } 3357 3358 spdk_trace_record(TRACE_TCP_QP_ABORT_REQ, qpair->qid, 0, (uintptr_t)req, tqpair); 3359 3360 if (tcp_req_to_abort == NULL) { 3361 spdk_nvmf_request_complete(req); 3362 return; 3363 } 3364 3365 req->req_to_abort = &tcp_req_to_abort->req; 3366 req->timeout_tsc = spdk_get_ticks() + 3367 transport->opts.abort_timeout_sec * spdk_get_ticks_hz(); 3368 req->poller = NULL; 3369 3370 _nvmf_tcp_qpair_abort_request(req); 3371 } 3372 3373 static void 3374 nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts) 3375 { 3376 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_IO_QUEUE_DEPTH; 3377 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR; 3378 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE; 3379 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE; 3380 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE; 3381 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_MAX_ADMIN_QUEUE_DEPTH; 3382 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS; 3383 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE; 3384 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP; 3385 opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC; 3386 opts->transport_specific = NULL; 3387 } 3388 3389 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = { 3390 .name = "TCP", 3391 .type = SPDK_NVME_TRANSPORT_TCP, 3392 .opts_init = nvmf_tcp_opts_init, 3393 .create = nvmf_tcp_create, 3394 .dump_opts = nvmf_tcp_dump_opts, 3395 .destroy = nvmf_tcp_destroy, 3396 3397 .listen = nvmf_tcp_listen, 3398 .stop_listen = nvmf_tcp_stop_listen, 3399 3400 .listener_discover = nvmf_tcp_discover, 3401 3402 .poll_group_create = nvmf_tcp_poll_group_create, 3403 .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group, 3404 .poll_group_destroy = nvmf_tcp_poll_group_destroy, 3405 .poll_group_add = nvmf_tcp_poll_group_add, 3406 .poll_group_remove = nvmf_tcp_poll_group_remove, 3407 .poll_group_poll = nvmf_tcp_poll_group_poll, 3408 3409 .req_free = nvmf_tcp_req_free, 3410 .req_complete = nvmf_tcp_req_complete, 3411 3412 .qpair_fini = nvmf_tcp_close_qpair, 3413 .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid, 3414 .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid, 3415 .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid, 3416 .qpair_abort_request = nvmf_tcp_qpair_abort_request, 3417 }; 3418 3419 SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp); 3420 SPDK_LOG_REGISTER_COMPONENT(nvmf_tcp) 3421