1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/trace.h" 22 #include "spdk/util.h" 23 #include "spdk/nvmf.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 #include "spdk_internal/trace_defs.h" 27 28 #define NVME_TCP_RW_BUFFER_SIZE 131072 29 30 /* For async connect workloads, allow more time since we are more likely 31 * to be processing lots ICREQs at once. 32 */ 33 #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */ 34 #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */ 35 36 #define NVME_TCP_HPDA_DEFAULT 0 37 #define NVME_TCP_MAX_R2T_DEFAULT 1 38 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 39 40 /* 41 * Maximum value of transport_ack_timeout used by TCP controller 42 */ 43 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 44 45 46 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 47 struct nvme_tcp_ctrlr { 48 struct spdk_nvme_ctrlr ctrlr; 49 char psk_identity[NVMF_PSK_IDENTITY_LEN]; 50 uint8_t psk[SPDK_TLS_PSK_MAX_LEN]; 51 int psk_size; 52 char *tls_cipher_suite; 53 }; 54 55 struct nvme_tcp_poll_group { 56 struct spdk_nvme_transport_poll_group group; 57 struct spdk_sock_group *sock_group; 58 uint32_t completions_per_qpair; 59 int64_t num_completions; 60 61 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 62 struct spdk_nvme_tcp_stat stats; 63 }; 64 65 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 66 struct nvme_tcp_qpair { 67 struct spdk_nvme_qpair qpair; 68 struct spdk_sock *sock; 69 70 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 71 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 72 73 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 74 struct nvme_tcp_pdu *recv_pdu; 75 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 76 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 77 enum nvme_tcp_pdu_recv_state recv_state; 78 struct nvme_tcp_req *tcp_reqs; 79 struct spdk_nvme_tcp_stat *stats; 80 81 uint16_t num_entries; 82 uint16_t async_complete; 83 84 struct { 85 uint16_t host_hdgst_enable: 1; 86 uint16_t host_ddgst_enable: 1; 87 uint16_t icreq_send_ack: 1; 88 uint16_t in_connect_poll: 1; 89 uint16_t reserved: 12; 90 } flags; 91 92 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 93 uint32_t maxh2cdata; 94 95 uint32_t maxr2t; 96 97 /* 0 based value, which is used to guide the padding */ 98 uint8_t cpda; 99 100 enum nvme_tcp_qpair_state state; 101 102 TAILQ_ENTRY(nvme_tcp_qpair) link; 103 bool needs_poll; 104 105 uint64_t icreq_timeout_tsc; 106 107 bool shared_stats; 108 }; 109 110 enum nvme_tcp_req_state { 111 NVME_TCP_REQ_FREE, 112 NVME_TCP_REQ_ACTIVE, 113 NVME_TCP_REQ_ACTIVE_R2T, 114 }; 115 116 struct nvme_tcp_req { 117 struct nvme_request *req; 118 enum nvme_tcp_req_state state; 119 uint16_t cid; 120 uint16_t ttag; 121 uint32_t datao; 122 uint32_t expected_datao; 123 uint32_t r2tl_remain; 124 uint32_t active_r2ts; 125 /* Used to hold a value received from subsequent R2T while we are still 126 * waiting for H2C complete */ 127 uint16_t ttag_r2t_next; 128 bool in_capsule_data; 129 /* It is used to track whether the req can be safely freed */ 130 union { 131 uint8_t raw; 132 struct { 133 /* The last send operation completed - kernel released send buffer */ 134 uint8_t send_ack : 1; 135 /* Data transfer completed - target send resp or last data bit */ 136 uint8_t data_recv : 1; 137 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 138 * from kernel) to send H2C */ 139 uint8_t h2c_send_waiting_ack : 1; 140 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 141 * Rare case, actual when dealing with target that can send several R2T requests. 142 * SPDK TCP target sends 1 R2T for the whole data buffer */ 143 uint8_t r2t_waiting_h2c_complete : 1; 144 /* Accel operation is in progress */ 145 uint8_t in_progress_accel : 1; 146 uint8_t reserved : 3; 147 } bits; 148 } ordering; 149 struct nvme_tcp_pdu *pdu; 150 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 151 uint32_t iovcnt; 152 /* Used to hold a value received from subsequent R2T while we are still 153 * waiting for H2C ack */ 154 uint32_t r2tl_remain_next; 155 struct nvme_tcp_qpair *tqpair; 156 TAILQ_ENTRY(nvme_tcp_req) link; 157 struct spdk_nvme_cpl rsp; 158 }; 159 160 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 161 162 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 163 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 164 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 165 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 166 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 167 struct spdk_nvme_cpl *rsp, bool print_on_error); 168 169 static inline struct nvme_tcp_qpair * 170 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 171 { 172 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 173 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 174 } 175 176 static inline struct nvme_tcp_poll_group * 177 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 178 { 179 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 180 } 181 182 static inline struct nvme_tcp_ctrlr * 183 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 184 { 185 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 186 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 187 } 188 189 static struct nvme_tcp_req * 190 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 191 { 192 struct nvme_tcp_req *tcp_req; 193 194 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 195 if (!tcp_req) { 196 return NULL; 197 } 198 199 assert(tcp_req->state == NVME_TCP_REQ_FREE); 200 tcp_req->state = NVME_TCP_REQ_ACTIVE; 201 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 202 tcp_req->datao = 0; 203 tcp_req->expected_datao = 0; 204 tcp_req->req = NULL; 205 tcp_req->in_capsule_data = false; 206 tcp_req->r2tl_remain = 0; 207 tcp_req->r2tl_remain_next = 0; 208 tcp_req->active_r2ts = 0; 209 tcp_req->iovcnt = 0; 210 tcp_req->ordering.raw = 0; 211 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 212 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 213 214 return tcp_req; 215 } 216 217 static void 218 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 219 { 220 assert(tcp_req->state != NVME_TCP_REQ_FREE); 221 tcp_req->state = NVME_TCP_REQ_FREE; 222 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 223 } 224 225 static inline void 226 nvme_tcp_accel_submit_crc32c(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq, 227 uint32_t *dst, struct iovec *iovs, uint32_t iovcnt, uint32_t seed, 228 spdk_nvme_accel_completion_cb cb_fn, void *cb_arg) 229 { 230 struct spdk_nvme_poll_group *pg = tgroup->group.group; 231 232 treq->ordering.bits.in_progress_accel = 1; 233 pg->accel_fn_table.submit_accel_crc32c(pg->ctx, dst, iovs, iovcnt, seed, cb_fn, cb_arg); 234 } 235 236 static inline void 237 nvme_tcp_accel_finish_sequence(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq, 238 void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg) 239 { 240 struct spdk_nvme_poll_group *pg = tgroup->group.group; 241 242 treq->ordering.bits.in_progress_accel = 1; 243 pg->accel_fn_table.finish_sequence(seq, cb_fn, cb_arg); 244 } 245 246 static inline void 247 nvme_tcp_accel_reverse_sequence(struct nvme_tcp_poll_group *tgroup, void *seq) 248 { 249 struct spdk_nvme_poll_group *pg = tgroup->group.group; 250 251 pg->accel_fn_table.reverse_sequence(seq); 252 } 253 254 static inline int 255 nvme_tcp_accel_append_crc32c(struct nvme_tcp_poll_group *tgroup, void **seq, uint32_t *dst, 256 struct iovec *iovs, uint32_t iovcnt, uint32_t seed, 257 spdk_nvme_accel_step_cb cb_fn, void *cb_arg) 258 { 259 struct spdk_nvme_poll_group *pg = tgroup->group.group; 260 261 return pg->accel_fn_table.append_crc32c(pg->ctx, seq, dst, iovs, iovcnt, NULL, NULL, 262 seed, cb_fn, cb_arg); 263 } 264 265 static void 266 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 267 { 268 free(tqpair->tcp_reqs); 269 tqpair->tcp_reqs = NULL; 270 271 spdk_free(tqpair->send_pdus); 272 tqpair->send_pdus = NULL; 273 } 274 275 static int 276 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 277 { 278 uint16_t i; 279 struct nvme_tcp_req *tcp_req; 280 281 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 282 if (tqpair->tcp_reqs == NULL) { 283 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 284 goto fail; 285 } 286 287 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 288 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 289 0x1000, NULL, 290 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 291 292 if (tqpair->send_pdus == NULL) { 293 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 294 goto fail; 295 } 296 297 TAILQ_INIT(&tqpair->send_queue); 298 TAILQ_INIT(&tqpair->free_reqs); 299 TAILQ_INIT(&tqpair->outstanding_reqs); 300 for (i = 0; i < tqpair->num_entries; i++) { 301 tcp_req = &tqpair->tcp_reqs[i]; 302 tcp_req->cid = i; 303 tcp_req->tqpair = tqpair; 304 tcp_req->pdu = &tqpair->send_pdus[i]; 305 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 306 } 307 308 tqpair->send_pdu = &tqpair->send_pdus[i]; 309 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 310 311 return 0; 312 fail: 313 nvme_tcp_free_reqs(tqpair); 314 return -ENOMEM; 315 } 316 317 static inline void 318 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 319 enum nvme_tcp_pdu_recv_state state) 320 { 321 if (tqpair->recv_state == state) { 322 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 323 tqpair, state); 324 return; 325 } 326 327 if (state == NVME_TCP_PDU_RECV_STATE_ERROR) { 328 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 329 } 330 331 tqpair->recv_state = state; 332 } 333 334 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 335 336 static void 337 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 338 { 339 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 340 struct nvme_tcp_pdu *pdu; 341 int rc; 342 struct nvme_tcp_poll_group *group; 343 344 if (tqpair->needs_poll) { 345 group = nvme_tcp_poll_group(qpair->poll_group); 346 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 347 tqpair->needs_poll = false; 348 } 349 350 rc = spdk_sock_close(&tqpair->sock); 351 352 if (tqpair->sock != NULL) { 353 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 354 /* Set it to NULL manually */ 355 tqpair->sock = NULL; 356 } 357 358 /* clear the send_queue */ 359 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 360 pdu = TAILQ_FIRST(&tqpair->send_queue); 361 /* Remove the pdu from the send_queue to prevent the wrong sending out 362 * in the next round connection 363 */ 364 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 365 } 366 367 nvme_tcp_qpair_abort_reqs(qpair, 0); 368 369 /* If the qpair is marked as asynchronous, let it go through the process_completions() to 370 * let any outstanding requests (e.g. those with outstanding accel operations) complete. 371 * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be 372 * empty. 373 */ 374 if (qpair->async) { 375 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 376 } else { 377 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 378 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 379 } 380 } 381 382 static int 383 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 384 { 385 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 386 387 assert(qpair != NULL); 388 nvme_tcp_qpair_abort_reqs(qpair, 0); 389 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 390 391 nvme_qpair_deinit(qpair); 392 nvme_tcp_free_reqs(tqpair); 393 if (!tqpair->shared_stats) { 394 free(tqpair->stats); 395 } 396 free(tqpair); 397 398 return 0; 399 } 400 401 static int 402 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 403 { 404 return 0; 405 } 406 407 static int 408 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 409 { 410 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 411 412 if (ctrlr->adminq) { 413 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 414 } 415 416 nvme_ctrlr_destruct_finish(ctrlr); 417 418 free(tctrlr); 419 420 return 0; 421 } 422 423 static void 424 pdu_write_done(void *cb_arg, int err) 425 { 426 struct nvme_tcp_pdu *pdu = cb_arg; 427 struct nvme_tcp_qpair *tqpair = pdu->qpair; 428 struct nvme_tcp_poll_group *pgroup; 429 430 /* If there are queued requests, we assume they are queued because they are waiting 431 * for resources to be released. Those resources are almost certainly released in 432 * response to a PDU completing here. However, to attempt to make forward progress 433 * the qpair needs to be polled and we can't rely on another network event to make 434 * that happen. Add it to a list of qpairs to poll regardless of network activity 435 * here. 436 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 437 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 438 * forward progress in case that the resources are released after icreq's or CONNECT's 439 * resp is processed. */ 440 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 441 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 442 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 443 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 444 445 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 446 tqpair->needs_poll = true; 447 } 448 449 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 450 451 if (err != 0) { 452 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 453 return; 454 } 455 456 assert(pdu->cb_fn != NULL); 457 pdu->cb_fn(pdu->cb_arg); 458 } 459 460 static void 461 pdu_write_fail(struct nvme_tcp_pdu *pdu, int status) 462 { 463 struct nvme_tcp_qpair *tqpair = pdu->qpair; 464 465 /* This function is similar to pdu_write_done(), but it should be called before a PDU is 466 * sent over the socket */ 467 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 468 pdu_write_done(pdu, status); 469 } 470 471 static void 472 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 473 { 474 uint32_t mapped_length = 0; 475 struct nvme_tcp_qpair *tqpair = pdu->qpair; 476 477 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 478 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 479 &mapped_length); 480 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 481 if (spdk_unlikely(mapped_length < pdu->data_len)) { 482 SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len, 483 mapped_length); 484 pdu_write_done(pdu, -EINVAL); 485 return; 486 } 487 pdu->sock_req.cb_fn = pdu_write_done; 488 pdu->sock_req.cb_arg = pdu; 489 tqpair->stats->submitted_requests++; 490 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 491 } 492 493 static void 494 tcp_write_pdu_seq_cb(void *ctx, int status) 495 { 496 struct nvme_tcp_pdu *pdu = ctx; 497 struct nvme_tcp_req *treq = pdu->req; 498 struct nvme_request *req = treq->req; 499 500 assert(treq->ordering.bits.in_progress_accel); 501 treq->ordering.bits.in_progress_accel = 0; 502 503 req->accel_sequence = NULL; 504 if (spdk_unlikely(status != 0)) { 505 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 506 pdu_write_fail(pdu, status); 507 return; 508 } 509 510 _tcp_write_pdu(pdu); 511 } 512 513 static void 514 tcp_write_pdu(struct nvme_tcp_pdu *pdu) 515 { 516 struct nvme_tcp_req *treq = pdu->req; 517 struct nvme_tcp_qpair *tqpair = pdu->qpair; 518 struct nvme_tcp_poll_group *tgroup; 519 struct nvme_request *req; 520 521 if (spdk_likely(treq != NULL)) { 522 req = treq->req; 523 if (req->accel_sequence != NULL && 524 spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER && 525 pdu->data_len > 0) { 526 assert(tqpair->qpair.poll_group != NULL); 527 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 528 nvme_tcp_accel_finish_sequence(tgroup, treq, req->accel_sequence, 529 tcp_write_pdu_seq_cb, pdu); 530 return; 531 } 532 } 533 534 _tcp_write_pdu(pdu); 535 } 536 537 static void 538 pdu_accel_compute_crc32_done(void *cb_arg, int status) 539 { 540 struct nvme_tcp_pdu *pdu = cb_arg; 541 struct nvme_tcp_req *req = pdu->req; 542 543 assert(req->ordering.bits.in_progress_accel); 544 req->ordering.bits.in_progress_accel = 0; 545 546 if (spdk_unlikely(status)) { 547 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 548 pdu_write_fail(pdu, status); 549 return; 550 } 551 552 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 553 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 554 555 _tcp_write_pdu(pdu); 556 } 557 558 static void 559 pdu_accel_compute_crc32_seq_cb(void *cb_arg, int status) 560 { 561 struct nvme_tcp_pdu *pdu = cb_arg; 562 struct nvme_tcp_qpair *tqpair = pdu->qpair; 563 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 564 struct nvme_tcp_req *treq = pdu->req; 565 struct nvme_request *req = treq->req; 566 567 assert(treq->ordering.bits.in_progress_accel); 568 treq->ordering.bits.in_progress_accel = 0; 569 570 req->accel_sequence = NULL; 571 if (spdk_unlikely(status != 0)) { 572 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 573 pdu_write_fail(pdu, status); 574 return; 575 } 576 577 nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32, 578 pdu->data_iov, pdu->data_iovcnt, 0, 579 pdu_accel_compute_crc32_done, pdu); 580 } 581 582 static void 583 pdu_accel_seq_compute_crc32_done(void *cb_arg) 584 { 585 struct nvme_tcp_pdu *pdu = cb_arg; 586 587 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 588 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 589 } 590 591 static bool 592 pdu_accel_compute_crc32(struct nvme_tcp_pdu *pdu) 593 { 594 struct nvme_tcp_qpair *tqpair = pdu->qpair; 595 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 596 struct nvme_request *req = ((struct nvme_tcp_req *)pdu->req)->req; 597 int rc; 598 599 /* Only support this limited case for the first step */ 600 if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED || 601 pdu->dif_ctx != NULL || 602 pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0)) { 603 return false; 604 } 605 606 if (tqpair->qpair.poll_group == NULL) { 607 return false; 608 } 609 610 if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) { 611 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence, 612 &pdu->data_digest_crc32, 613 pdu->data_iov, pdu->data_iovcnt, 0, 614 pdu_accel_seq_compute_crc32_done, pdu); 615 if (spdk_unlikely(rc != 0)) { 616 /* If accel is out of resources, fall back to non-accelerated crc32 */ 617 if (rc == -ENOMEM) { 618 return false; 619 } 620 621 SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc); 622 pdu_write_fail(pdu, rc); 623 return true; 624 } 625 626 tcp_write_pdu(pdu); 627 return true; 628 } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) { 629 if (req->accel_sequence != NULL) { 630 nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence, 631 pdu_accel_compute_crc32_seq_cb, pdu); 632 } else { 633 nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32, 634 pdu->data_iov, pdu->data_iovcnt, 0, 635 pdu_accel_compute_crc32_done, pdu); 636 } 637 638 return true; 639 } 640 641 return false; 642 } 643 644 static void 645 pdu_compute_crc32_seq_cb(void *cb_arg, int status) 646 { 647 struct nvme_tcp_pdu *pdu = cb_arg; 648 struct nvme_tcp_req *treq = pdu->req; 649 struct nvme_request *req = treq->req; 650 uint32_t crc32c; 651 652 assert(treq->ordering.bits.in_progress_accel); 653 treq->ordering.bits.in_progress_accel = 0; 654 655 req->accel_sequence = NULL; 656 if (spdk_unlikely(status != 0)) { 657 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 658 pdu_write_fail(pdu, status); 659 return; 660 } 661 662 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 663 crc32c = crc32c ^ SPDK_CRC32C_XOR; 664 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 665 666 _tcp_write_pdu(pdu); 667 } 668 669 static void 670 pdu_compute_crc32(struct nvme_tcp_pdu *pdu) 671 { 672 struct nvme_tcp_qpair *tqpair = pdu->qpair; 673 struct nvme_tcp_poll_group *tgroup; 674 struct nvme_request *req; 675 uint32_t crc32c; 676 677 /* Data Digest */ 678 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 679 tqpair->flags.host_ddgst_enable) { 680 if (pdu_accel_compute_crc32(pdu)) { 681 return; 682 } 683 684 req = ((struct nvme_tcp_req *)pdu->req)->req; 685 if (req->accel_sequence != NULL) { 686 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 687 nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence, 688 pdu_compute_crc32_seq_cb, pdu); 689 return; 690 } 691 692 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 693 crc32c = crc32c ^ SPDK_CRC32C_XOR; 694 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 695 } 696 697 tcp_write_pdu(pdu); 698 } 699 700 static int 701 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 702 struct nvme_tcp_pdu *pdu, 703 nvme_tcp_qpair_xfer_complete_cb cb_fn, 704 void *cb_arg) 705 { 706 int hlen; 707 uint32_t crc32c; 708 709 hlen = pdu->hdr.common.hlen; 710 pdu->cb_fn = cb_fn; 711 pdu->cb_arg = cb_arg; 712 pdu->qpair = tqpair; 713 714 /* Header Digest */ 715 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 716 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 717 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 718 } 719 720 pdu_compute_crc32(pdu); 721 722 return 0; 723 } 724 725 /* 726 * Build SGL describing contiguous payload buffer. 727 */ 728 static int 729 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 730 { 731 struct nvme_request *req = tcp_req->req; 732 733 /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL, 734 * so just double cast it to make it go away */ 735 tcp_req->iov[0].iov_base = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset); 736 tcp_req->iov[0].iov_len = req->payload_size; 737 tcp_req->iovcnt = 1; 738 739 SPDK_DEBUGLOG(nvme, "enter\n"); 740 741 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 742 743 return 0; 744 } 745 746 /* 747 * Build SGL describing scattered payload buffer. 748 */ 749 static int 750 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 751 { 752 int rc; 753 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 754 struct nvme_request *req = tcp_req->req; 755 756 SPDK_DEBUGLOG(nvme, "enter\n"); 757 758 assert(req->payload_size != 0); 759 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 760 assert(req->payload.reset_sgl_fn != NULL); 761 assert(req->payload.next_sge_fn != NULL); 762 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 763 764 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 765 remaining_size = req->payload_size; 766 767 do { 768 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 769 &length); 770 if (rc) { 771 return -1; 772 } 773 774 length = spdk_min(length, remaining_size); 775 tcp_req->iov[iovcnt].iov_len = length; 776 remaining_size -= length; 777 iovcnt++; 778 } while (remaining_size > 0 && iovcnt < max_num_sgl); 779 780 781 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 782 if (remaining_size > 0) { 783 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 784 tcp_req, iovcnt, remaining_size); 785 return -1; 786 } 787 788 tcp_req->iovcnt = iovcnt; 789 790 return 0; 791 } 792 793 static int 794 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 795 struct nvme_tcp_req *tcp_req) 796 { 797 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 798 int rc = 0; 799 enum spdk_nvme_data_transfer xfer; 800 uint32_t max_in_capsule_data_size; 801 802 tcp_req->req = req; 803 req->cmd.cid = tcp_req->cid; 804 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 805 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 806 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 807 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 808 809 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 810 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 811 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 812 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 813 } else { 814 rc = -1; 815 } 816 817 if (rc) { 818 return rc; 819 } 820 821 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 822 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 823 824 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 825 } else { 826 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 827 } 828 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 829 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 830 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 831 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 832 } 833 834 if (req->payload_size <= max_in_capsule_data_size) { 835 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 836 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 837 req->cmd.dptr.sgl1.address = 0; 838 tcp_req->in_capsule_data = true; 839 } 840 } 841 842 return 0; 843 } 844 845 static inline bool 846 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 847 { 848 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 849 return false; 850 } 851 852 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 853 assert(tcp_req->tqpair != NULL); 854 assert(tcp_req->req != NULL); 855 856 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 857 858 if (!tcp_req->tqpair->qpair.in_completion_context) { 859 tcp_req->tqpair->async_complete++; 860 } 861 862 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 863 return true; 864 } 865 866 static void 867 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 868 { 869 struct nvme_tcp_req *tcp_req = cb_arg; 870 871 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 872 tcp_req->tqpair->qpair.id); 873 tcp_req->ordering.bits.send_ack = 1; 874 /* Handle the r2t case */ 875 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 876 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 877 nvme_tcp_send_h2c_data(tcp_req); 878 } else { 879 nvme_tcp_req_complete_safe(tcp_req); 880 } 881 } 882 883 static int 884 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 885 struct nvme_tcp_req *tcp_req) 886 { 887 struct nvme_tcp_pdu *pdu; 888 struct spdk_nvme_tcp_cmd *capsule_cmd; 889 uint32_t plen = 0, alignment; 890 uint8_t pdo; 891 892 SPDK_DEBUGLOG(nvme, "enter\n"); 893 pdu = tcp_req->pdu; 894 pdu->req = tcp_req; 895 896 capsule_cmd = &pdu->hdr.capsule_cmd; 897 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 898 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 899 capsule_cmd->ccsqe = tcp_req->req->cmd; 900 901 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 902 903 if (tqpair->flags.host_hdgst_enable) { 904 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 905 tcp_req); 906 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 907 plen += SPDK_NVME_TCP_DIGEST_LEN; 908 } 909 910 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 911 goto end; 912 } 913 914 pdo = plen; 915 pdu->padding_len = 0; 916 if (tqpair->cpda) { 917 alignment = (tqpair->cpda + 1) << 2; 918 if (alignment > plen) { 919 pdu->padding_len = alignment - plen; 920 pdo = alignment; 921 plen = alignment; 922 } 923 } 924 925 capsule_cmd->common.pdo = pdo; 926 plen += tcp_req->req->payload_size; 927 if (tqpair->flags.host_ddgst_enable) { 928 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 929 plen += SPDK_NVME_TCP_DIGEST_LEN; 930 } 931 932 tcp_req->datao = 0; 933 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 934 0, tcp_req->req->payload_size); 935 end: 936 capsule_cmd->common.plen = plen; 937 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 938 939 } 940 941 static int 942 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 943 struct nvme_request *req) 944 { 945 struct nvme_tcp_qpair *tqpair; 946 struct nvme_tcp_req *tcp_req; 947 948 tqpair = nvme_tcp_qpair(qpair); 949 assert(tqpair != NULL); 950 assert(req != NULL); 951 952 tcp_req = nvme_tcp_req_get(tqpair); 953 if (!tcp_req) { 954 tqpair->stats->queued_requests++; 955 /* Inform the upper layer to try again later. */ 956 return -EAGAIN; 957 } 958 959 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 960 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 961 nvme_tcp_req_put(tqpair, tcp_req); 962 return -1; 963 } 964 965 spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 966 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 967 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12); 968 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 969 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 970 } 971 972 static int 973 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 974 { 975 return 0; 976 } 977 978 static void 979 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 980 struct nvme_tcp_qpair *tqpair, 981 struct spdk_nvme_cpl *rsp, 982 bool print_on_error) 983 { 984 struct spdk_nvme_cpl cpl; 985 struct spdk_nvme_qpair *qpair; 986 struct nvme_request *req; 987 bool error, print_error; 988 989 assert(tcp_req->req != NULL); 990 req = tcp_req->req; 991 qpair = req->qpair; 992 993 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 994 memcpy(&cpl, rsp, sizeof(cpl)); 995 996 error = spdk_nvme_cpl_is_error(rsp); 997 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 998 999 if (print_error) { 1000 spdk_nvme_qpair_print_command(qpair, &req->cmd); 1001 } 1002 1003 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 1004 spdk_nvme_qpair_print_completion(qpair, rsp); 1005 } 1006 1007 spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 1008 (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw); 1009 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 1010 nvme_tcp_req_put(tqpair, tcp_req); 1011 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl); 1012 } 1013 1014 static void 1015 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 1016 { 1017 struct nvme_tcp_req *tcp_req, *tmp; 1018 struct spdk_nvme_cpl cpl = {}; 1019 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1020 1021 cpl.sqid = qpair->id; 1022 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1023 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1024 cpl.status.dnr = dnr; 1025 1026 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1027 /* We cannot abort requests with accel operations in progress */ 1028 if (tcp_req->ordering.bits.in_progress_accel) { 1029 continue; 1030 } 1031 1032 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 1033 } 1034 } 1035 1036 static void 1037 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 1038 { 1039 struct nvme_tcp_qpair *tqpair = cb_arg; 1040 1041 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1042 } 1043 1044 static void 1045 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1046 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1047 { 1048 struct nvme_tcp_pdu *rsp_pdu; 1049 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 1050 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 1051 uint8_t copy_len; 1052 1053 rsp_pdu = tqpair->send_pdu; 1054 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1055 h2c_term_req = &rsp_pdu->hdr.term_req; 1056 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 1057 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 1058 1059 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1060 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1061 DSET32(&h2c_term_req->fei, error_offset); 1062 } 1063 1064 copy_len = pdu->hdr.common.hlen; 1065 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 1066 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 1067 } 1068 1069 /* Copy the error info into the buffer */ 1070 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 1071 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 1072 1073 /* Contain the header len of the wrong received pdu */ 1074 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 1075 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1076 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 1077 } 1078 1079 static bool 1080 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 1081 { 1082 switch (tqpair->state) { 1083 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1084 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1085 case NVME_TCP_QPAIR_STATE_RUNNING: 1086 return true; 1087 default: 1088 return false; 1089 } 1090 } 1091 1092 static void 1093 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 1094 { 1095 struct nvme_tcp_pdu *pdu; 1096 uint32_t error_offset = 0; 1097 enum spdk_nvme_tcp_term_req_fes fes; 1098 uint32_t expected_hlen, hd_len = 0; 1099 bool plen_error = false; 1100 1101 pdu = tqpair->recv_pdu; 1102 1103 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 1104 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 1105 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 1106 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 1107 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1108 goto err; 1109 } 1110 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 1111 if (pdu->hdr.common.plen != expected_hlen) { 1112 plen_error = true; 1113 } 1114 } else { 1115 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 1116 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 1117 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1118 goto err; 1119 } 1120 1121 switch (pdu->hdr.common.pdu_type) { 1122 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1123 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 1124 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 1125 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 1126 } 1127 1128 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 1129 plen_error = true; 1130 } 1131 break; 1132 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1133 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1134 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 1135 plen_error = true; 1136 } 1137 break; 1138 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1139 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 1140 if ((pdu->hdr.common.plen <= expected_hlen) || 1141 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 1142 plen_error = true; 1143 } 1144 break; 1145 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1146 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 1147 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 1148 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 1149 } 1150 1151 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 1152 plen_error = true; 1153 } 1154 break; 1155 1156 default: 1157 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1158 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1159 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 1160 goto err; 1161 } 1162 } 1163 1164 if (pdu->hdr.common.hlen != expected_hlen) { 1165 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 1166 expected_hlen, pdu->hdr.common.hlen); 1167 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1168 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 1169 goto err; 1170 1171 } else if (plen_error) { 1172 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1173 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 1174 goto err; 1175 } else { 1176 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1177 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 1178 return; 1179 } 1180 err: 1181 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1182 } 1183 1184 static struct nvme_tcp_req * 1185 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 1186 { 1187 assert(tqpair != NULL); 1188 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 1189 return NULL; 1190 } 1191 1192 return &tqpair->tcp_reqs[cid]; 1193 } 1194 1195 static void 1196 nvme_tcp_recv_payload_seq_cb(void *cb_arg, int status) 1197 { 1198 struct nvme_tcp_req *treq = cb_arg; 1199 struct nvme_request *req = treq->req; 1200 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1201 struct nvme_tcp_poll_group *group; 1202 1203 assert(treq->ordering.bits.in_progress_accel); 1204 treq->ordering.bits.in_progress_accel = 0; 1205 1206 /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see 1207 * comment in pdu_write_done(). */ 1208 if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) { 1209 group = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1210 TAILQ_INSERT_TAIL(&group->needs_poll, tqpair, link); 1211 tqpair->needs_poll = true; 1212 } 1213 1214 req->accel_sequence = NULL; 1215 if (spdk_unlikely(status != 0)) { 1216 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 1217 treq->rsp.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1218 } 1219 1220 nvme_tcp_req_complete_safe(treq); 1221 } 1222 1223 static void 1224 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 1225 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 1226 { 1227 struct nvme_tcp_req *tcp_req; 1228 struct nvme_tcp_poll_group *tgroup; 1229 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 1230 uint8_t flags; 1231 1232 tcp_req = pdu->req; 1233 assert(tcp_req != NULL); 1234 1235 SPDK_DEBUGLOG(nvme, "enter\n"); 1236 c2h_data = &pdu->hdr.c2h_data; 1237 tcp_req->datao += pdu->data_len; 1238 flags = c2h_data->common.flags; 1239 1240 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 1241 if (tcp_req->datao == tcp_req->req->payload_size) { 1242 tcp_req->rsp.status.p = 0; 1243 } else { 1244 tcp_req->rsp.status.p = 1; 1245 } 1246 1247 tcp_req->rsp.cid = tcp_req->cid; 1248 tcp_req->rsp.sqid = tqpair->qpair.id; 1249 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1250 tcp_req->ordering.bits.data_recv = 1; 1251 if (tcp_req->req->accel_sequence != NULL) { 1252 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1253 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence); 1254 nvme_tcp_accel_finish_sequence(tgroup, tcp_req, 1255 tcp_req->req->accel_sequence, 1256 nvme_tcp_recv_payload_seq_cb, 1257 tcp_req); 1258 return; 1259 } 1260 1261 if (nvme_tcp_req_complete_safe(tcp_req)) { 1262 (*reaped)++; 1263 } 1264 } 1265 } 1266 } 1267 1268 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1269 "Invalid PDU Header Field", 1270 "PDU Sequence Error", 1271 "Header Digest Error", 1272 "Data Transfer Out of Range", 1273 "Data Transfer Limit Exceeded", 1274 "Unsupported parameter", 1275 }; 1276 1277 static void 1278 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1279 { 1280 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1281 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1282 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1283 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1284 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1285 DGET32(c2h_term_req->fei)); 1286 } 1287 /* we may also need to dump some other info here */ 1288 } 1289 1290 static void 1291 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1292 struct nvme_tcp_pdu *pdu) 1293 { 1294 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1295 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1296 } 1297 1298 static void 1299 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1300 { 1301 struct nvme_tcp_pdu *pdu; 1302 1303 assert(tqpair != NULL); 1304 pdu = tqpair->recv_pdu; 1305 1306 switch (pdu->hdr.common.pdu_type) { 1307 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1308 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1309 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1310 break; 1311 1312 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1313 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1314 break; 1315 1316 default: 1317 /* The code should not go to here */ 1318 SPDK_ERRLOG("The code should not go to here\n"); 1319 break; 1320 } 1321 } 1322 1323 static void 1324 nvme_tcp_accel_recv_compute_crc32_done(void *cb_arg, int status) 1325 { 1326 struct nvme_tcp_req *tcp_req = cb_arg; 1327 struct nvme_tcp_pdu *pdu; 1328 struct nvme_tcp_qpair *tqpair; 1329 int rc; 1330 struct nvme_tcp_poll_group *pgroup; 1331 int dummy_reaped = 0; 1332 1333 pdu = tcp_req->pdu; 1334 assert(pdu != NULL); 1335 1336 tqpair = tcp_req->tqpair; 1337 assert(tqpair != NULL); 1338 1339 assert(tcp_req->ordering.bits.in_progress_accel); 1340 tcp_req->ordering.bits.in_progress_accel = 0; 1341 1342 /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see 1343 * comment in pdu_write_done(). */ 1344 if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) { 1345 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1346 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1347 tqpair->needs_poll = true; 1348 } 1349 1350 if (spdk_unlikely(status)) { 1351 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1352 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1353 goto end; 1354 } 1355 1356 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1357 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1358 if (rc == 0) { 1359 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1360 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1361 } 1362 1363 end: 1364 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1365 } 1366 1367 static void 1368 nvme_tcp_req_copy_pdu(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu) 1369 { 1370 treq->pdu->hdr = pdu->hdr; 1371 treq->pdu->req = treq; 1372 memcpy(treq->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1373 memcpy(treq->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1374 treq->pdu->data_iovcnt = pdu->data_iovcnt; 1375 treq->pdu->data_len = pdu->data_len; 1376 } 1377 1378 static void 1379 nvme_tcp_accel_seq_recv_compute_crc32_done(void *cb_arg) 1380 { 1381 struct nvme_tcp_req *treq = cb_arg; 1382 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1383 struct nvme_tcp_pdu *pdu = treq->pdu; 1384 bool result; 1385 1386 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1387 result = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1388 if (spdk_unlikely(!result)) { 1389 SPDK_ERRLOG("data digest error on tqpair=(%p)\n", tqpair); 1390 treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1391 } 1392 } 1393 1394 static bool 1395 nvme_tcp_accel_recv_compute_crc32(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu) 1396 { 1397 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1398 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1399 struct nvme_request *req = treq->req; 1400 int rc, dummy = 0; 1401 1402 /* Only support this limited case that the request has only one c2h pdu */ 1403 if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED || 1404 tqpair->qpair.poll_group == NULL || pdu->dif_ctx != NULL || 1405 pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0 || 1406 pdu->data_len != req->payload_size)) { 1407 return false; 1408 } 1409 1410 if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) { 1411 nvme_tcp_req_copy_pdu(treq, pdu); 1412 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence, 1413 &treq->pdu->data_digest_crc32, 1414 treq->pdu->data_iov, treq->pdu->data_iovcnt, 0, 1415 nvme_tcp_accel_seq_recv_compute_crc32_done, treq); 1416 if (spdk_unlikely(rc != 0)) { 1417 /* If accel is out of resources, fall back to non-accelerated crc32 */ 1418 if (rc == -ENOMEM) { 1419 return false; 1420 } 1421 1422 SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc); 1423 treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1424 } 1425 1426 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1427 nvme_tcp_c2h_data_payload_handle(tqpair, treq->pdu, &dummy); 1428 return true; 1429 } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) { 1430 nvme_tcp_req_copy_pdu(treq, pdu); 1431 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1432 nvme_tcp_accel_submit_crc32c(tgroup, treq, &treq->pdu->data_digest_crc32, 1433 treq->pdu->data_iov, treq->pdu->data_iovcnt, 0, 1434 nvme_tcp_accel_recv_compute_crc32_done, treq); 1435 return true; 1436 } 1437 1438 return false; 1439 } 1440 1441 static void 1442 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1443 uint32_t *reaped) 1444 { 1445 int rc = 0; 1446 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1447 uint32_t crc32c; 1448 struct nvme_tcp_req *tcp_req = pdu->req; 1449 1450 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1451 SPDK_DEBUGLOG(nvme, "enter\n"); 1452 1453 /* The request can be NULL, e.g. in case of C2HTermReq */ 1454 if (spdk_likely(tcp_req != NULL)) { 1455 tcp_req->expected_datao += pdu->data_len; 1456 } 1457 1458 /* check data digest if need */ 1459 if (pdu->ddgst_enable) { 1460 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1461 assert(tcp_req != NULL); 1462 if (nvme_tcp_accel_recv_compute_crc32(tcp_req, pdu)) { 1463 return; 1464 } 1465 1466 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1467 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1468 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1469 if (rc == 0) { 1470 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1471 tcp_req = pdu->req; 1472 assert(tcp_req != NULL); 1473 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1474 } 1475 } 1476 1477 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1478 } 1479 1480 static void 1481 nvme_tcp_send_icreq_complete(void *cb_arg) 1482 { 1483 struct nvme_tcp_qpair *tqpair = cb_arg; 1484 1485 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1486 1487 tqpair->flags.icreq_send_ack = true; 1488 1489 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1490 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1491 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1492 } 1493 } 1494 1495 static void 1496 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1497 struct nvme_tcp_pdu *pdu) 1498 { 1499 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1500 uint32_t error_offset = 0; 1501 enum spdk_nvme_tcp_term_req_fes fes; 1502 int recv_buf_size; 1503 1504 /* Only PFV 0 is defined currently */ 1505 if (ic_resp->pfv != 0) { 1506 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1507 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1508 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1509 goto end; 1510 } 1511 1512 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1513 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1514 ic_resp->maxh2cdata); 1515 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1516 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1517 goto end; 1518 } 1519 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1520 1521 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1522 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1523 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1524 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1525 goto end; 1526 } 1527 tqpair->cpda = ic_resp->cpda; 1528 1529 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1530 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1531 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1532 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1533 1534 /* Now that we know whether digests are enabled, properly size the receive buffer to 1535 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1536 * parameter. */ 1537 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1538 1539 if (tqpair->flags.host_hdgst_enable) { 1540 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1541 } 1542 1543 if (tqpair->flags.host_ddgst_enable) { 1544 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1545 } 1546 1547 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1548 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1549 tqpair, 1550 recv_buf_size); 1551 /* Not fatal. */ 1552 } 1553 1554 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1555 1556 if (!tqpair->flags.icreq_send_ack) { 1557 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1558 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1559 return; 1560 } 1561 1562 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1563 return; 1564 end: 1565 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1566 } 1567 1568 static void 1569 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1570 uint32_t *reaped) 1571 { 1572 struct nvme_tcp_req *tcp_req; 1573 struct nvme_tcp_poll_group *tgroup; 1574 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1575 uint32_t cid, error_offset = 0; 1576 enum spdk_nvme_tcp_term_req_fes fes; 1577 1578 SPDK_DEBUGLOG(nvme, "enter\n"); 1579 cid = capsule_resp->rccqe.cid; 1580 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1581 1582 if (!tcp_req) { 1583 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1584 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1585 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1586 goto end; 1587 } 1588 1589 assert(tcp_req->req != NULL); 1590 1591 tcp_req->rsp = capsule_resp->rccqe; 1592 tcp_req->ordering.bits.data_recv = 1; 1593 1594 /* Recv the pdu again */ 1595 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1596 1597 if (tcp_req->req->accel_sequence != NULL) { 1598 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1599 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence); 1600 nvme_tcp_accel_finish_sequence(tgroup, tcp_req, tcp_req->req->accel_sequence, 1601 nvme_tcp_recv_payload_seq_cb, tcp_req); 1602 return; 1603 } 1604 1605 if (nvme_tcp_req_complete_safe(tcp_req)) { 1606 (*reaped)++; 1607 } 1608 1609 return; 1610 1611 end: 1612 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1613 } 1614 1615 static void 1616 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1617 struct nvme_tcp_pdu *pdu) 1618 { 1619 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1620 uint32_t error_offset = 0; 1621 enum spdk_nvme_tcp_term_req_fes fes; 1622 1623 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1624 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1625 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1626 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1627 goto end; 1628 } 1629 1630 /* set the data buffer */ 1631 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1632 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1633 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1634 return; 1635 end: 1636 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1637 } 1638 1639 static void 1640 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1641 { 1642 struct nvme_tcp_req *tcp_req; 1643 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1644 uint32_t error_offset = 0; 1645 enum spdk_nvme_tcp_term_req_fes fes; 1646 int flags = c2h_data->common.flags; 1647 1648 SPDK_DEBUGLOG(nvme, "enter\n"); 1649 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1650 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1651 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1652 if (!tcp_req) { 1653 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1654 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1655 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1656 goto end; 1657 1658 } 1659 1660 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1661 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1662 1663 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1664 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1665 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1666 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1667 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1668 goto end; 1669 } 1670 1671 if (c2h_data->datal > tcp_req->req->payload_size) { 1672 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1673 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1674 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1675 goto end; 1676 } 1677 1678 if (tcp_req->expected_datao != c2h_data->datao) { 1679 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1680 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1681 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1682 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1683 goto end; 1684 } 1685 1686 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1687 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1688 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1689 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1690 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1691 goto end; 1692 1693 } 1694 1695 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1696 c2h_data->datao, c2h_data->datal); 1697 pdu->req = tcp_req; 1698 1699 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1700 return; 1701 1702 end: 1703 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1704 } 1705 1706 static void 1707 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1708 { 1709 struct nvme_tcp_req *tcp_req = cb_arg; 1710 1711 assert(tcp_req != NULL); 1712 1713 tcp_req->ordering.bits.send_ack = 1; 1714 if (tcp_req->r2tl_remain) { 1715 nvme_tcp_send_h2c_data(tcp_req); 1716 } else { 1717 assert(tcp_req->active_r2ts > 0); 1718 tcp_req->active_r2ts--; 1719 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1720 1721 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1722 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1723 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1724 assert(tcp_req->active_r2ts > 0); 1725 tcp_req->ttag = tcp_req->ttag_r2t_next; 1726 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1727 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1728 nvme_tcp_send_h2c_data(tcp_req); 1729 return; 1730 } 1731 1732 /* Need also call this function to free the resource */ 1733 nvme_tcp_req_complete_safe(tcp_req); 1734 } 1735 } 1736 1737 static void 1738 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1739 { 1740 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1741 struct nvme_tcp_pdu *rsp_pdu; 1742 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1743 uint32_t plen, pdo, alignment; 1744 1745 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1746 tcp_req->ordering.bits.send_ack = 0; 1747 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1748 rsp_pdu = tcp_req->pdu; 1749 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1750 rsp_pdu->req = tcp_req; 1751 h2c_data = &rsp_pdu->hdr.h2c_data; 1752 1753 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1754 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1755 h2c_data->cccid = tcp_req->cid; 1756 h2c_data->ttag = tcp_req->ttag; 1757 h2c_data->datao = tcp_req->datao; 1758 1759 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1760 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1761 h2c_data->datao, h2c_data->datal); 1762 tcp_req->r2tl_remain -= h2c_data->datal; 1763 1764 if (tqpair->flags.host_hdgst_enable) { 1765 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1766 plen += SPDK_NVME_TCP_DIGEST_LEN; 1767 } 1768 1769 rsp_pdu->padding_len = 0; 1770 pdo = plen; 1771 if (tqpair->cpda) { 1772 alignment = (tqpair->cpda + 1) << 2; 1773 if (alignment > plen) { 1774 rsp_pdu->padding_len = alignment - plen; 1775 pdo = plen = alignment; 1776 } 1777 } 1778 1779 h2c_data->common.pdo = pdo; 1780 plen += h2c_data->datal; 1781 if (tqpair->flags.host_ddgst_enable) { 1782 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1783 plen += SPDK_NVME_TCP_DIGEST_LEN; 1784 } 1785 1786 h2c_data->common.plen = plen; 1787 tcp_req->datao += h2c_data->datal; 1788 if (!tcp_req->r2tl_remain) { 1789 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1790 } 1791 1792 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1793 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1794 1795 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1796 } 1797 1798 static void 1799 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1800 { 1801 struct nvme_tcp_req *tcp_req; 1802 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1803 uint32_t cid, error_offset = 0; 1804 enum spdk_nvme_tcp_term_req_fes fes; 1805 1806 SPDK_DEBUGLOG(nvme, "enter\n"); 1807 cid = r2t->cccid; 1808 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1809 if (!tcp_req) { 1810 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1811 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1812 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1813 goto end; 1814 } 1815 1816 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1817 tqpair); 1818 1819 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1820 assert(tcp_req->active_r2ts == 0); 1821 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1822 } 1823 1824 if (tcp_req->datao != r2t->r2to) { 1825 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1826 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1827 goto end; 1828 1829 } 1830 1831 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1832 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1833 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1834 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1835 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1836 goto end; 1837 } 1838 1839 tcp_req->active_r2ts++; 1840 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1841 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1842 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1843 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1844 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1845 tcp_req->ttag_r2t_next = r2t->ttag; 1846 tcp_req->r2tl_remain_next = r2t->r2tl; 1847 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1848 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1849 return; 1850 } else { 1851 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1852 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1853 tqpair); 1854 goto end; 1855 } 1856 } 1857 1858 tcp_req->ttag = r2t->ttag; 1859 tcp_req->r2tl_remain = r2t->r2tl; 1860 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1861 1862 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1863 nvme_tcp_send_h2c_data(tcp_req); 1864 } else { 1865 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1866 } 1867 1868 return; 1869 1870 end: 1871 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1872 1873 } 1874 1875 static void 1876 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1877 { 1878 struct nvme_tcp_pdu *pdu; 1879 int rc; 1880 uint32_t crc32c, error_offset = 0; 1881 enum spdk_nvme_tcp_term_req_fes fes; 1882 1883 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1884 pdu = tqpair->recv_pdu; 1885 1886 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1887 /* check header digest if needed */ 1888 if (pdu->has_hdgst) { 1889 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1890 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1891 if (rc == 0) { 1892 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1893 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1894 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1895 return; 1896 1897 } 1898 } 1899 1900 switch (pdu->hdr.common.pdu_type) { 1901 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1902 nvme_tcp_icresp_handle(tqpair, pdu); 1903 break; 1904 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1905 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1906 break; 1907 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1908 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1909 break; 1910 1911 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1912 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1913 break; 1914 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1915 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1916 break; 1917 1918 default: 1919 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1920 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1921 error_offset = 1; 1922 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1923 break; 1924 } 1925 1926 } 1927 1928 static int 1929 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions) 1930 { 1931 int rc = 0; 1932 struct nvme_tcp_pdu *pdu; 1933 uint32_t data_len; 1934 enum nvme_tcp_pdu_recv_state prev_state; 1935 1936 *reaped = tqpair->async_complete; 1937 tqpair->async_complete = 0; 1938 1939 /* The loop here is to allow for several back-to-back state changes. */ 1940 do { 1941 if (*reaped >= max_completions) { 1942 break; 1943 } 1944 1945 prev_state = tqpair->recv_state; 1946 pdu = tqpair->recv_pdu; 1947 switch (tqpair->recv_state) { 1948 /* If in a new state */ 1949 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1950 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1951 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1952 break; 1953 /* Wait for the pdu common header */ 1954 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1955 assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 1956 rc = nvme_tcp_read_data(tqpair->sock, 1957 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1958 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1959 if (rc < 0) { 1960 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1961 break; 1962 } 1963 pdu->ch_valid_bytes += rc; 1964 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1965 return NVME_TCP_PDU_IN_PROGRESS; 1966 } 1967 1968 /* The command header of this PDU has now been read from the socket. */ 1969 nvme_tcp_pdu_ch_handle(tqpair); 1970 break; 1971 /* Wait for the pdu specific header */ 1972 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1973 assert(pdu->psh_valid_bytes < pdu->psh_len); 1974 rc = nvme_tcp_read_data(tqpair->sock, 1975 pdu->psh_len - pdu->psh_valid_bytes, 1976 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1977 if (rc < 0) { 1978 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1979 break; 1980 } 1981 1982 pdu->psh_valid_bytes += rc; 1983 if (pdu->psh_valid_bytes < pdu->psh_len) { 1984 return NVME_TCP_PDU_IN_PROGRESS; 1985 } 1986 1987 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1988 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1989 break; 1990 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1991 /* check whether the data is valid, if not we just return */ 1992 if (!pdu->data_len) { 1993 return NVME_TCP_PDU_IN_PROGRESS; 1994 } 1995 1996 data_len = pdu->data_len; 1997 /* data digest */ 1998 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1999 tqpair->flags.host_ddgst_enable)) { 2000 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2001 pdu->ddgst_enable = true; 2002 } 2003 2004 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2005 if (rc < 0) { 2006 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2007 break; 2008 } 2009 2010 pdu->rw_offset += rc; 2011 if (pdu->rw_offset < data_len) { 2012 return NVME_TCP_PDU_IN_PROGRESS; 2013 } 2014 2015 assert(pdu->rw_offset == data_len); 2016 /* All of this PDU has now been read from the socket. */ 2017 nvme_tcp_pdu_payload_handle(tqpair, reaped); 2018 break; 2019 case NVME_TCP_PDU_RECV_STATE_QUIESCING: 2020 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2021 if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) { 2022 nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair); 2023 } 2024 2025 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2026 } 2027 break; 2028 case NVME_TCP_PDU_RECV_STATE_ERROR: 2029 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 2030 return NVME_TCP_PDU_FATAL; 2031 default: 2032 assert(0); 2033 break; 2034 } 2035 } while (prev_state != tqpair->recv_state); 2036 2037 return rc > 0 ? 0 : rc; 2038 } 2039 2040 static void 2041 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 2042 { 2043 uint64_t t02; 2044 struct nvme_tcp_req *tcp_req, *tmp; 2045 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2046 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 2047 struct spdk_nvme_ctrlr_process *active_proc; 2048 2049 /* Don't check timeouts during controller initialization. */ 2050 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 2051 return; 2052 } 2053 2054 if (nvme_qpair_is_admin_queue(qpair)) { 2055 active_proc = nvme_ctrlr_get_current_process(ctrlr); 2056 } else { 2057 active_proc = qpair->active_proc; 2058 } 2059 2060 /* Only check timeouts if the current process has a timeout callback. */ 2061 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 2062 return; 2063 } 2064 2065 t02 = spdk_get_ticks(); 2066 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2067 if (ctrlr->is_failed) { 2068 /* The controller state may be changed to failed in one of the nvme_request_check_timeout callbacks. */ 2069 return; 2070 } 2071 assert(tcp_req->req != NULL); 2072 2073 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 2074 /* 2075 * The requests are in order, so as soon as one has not timed out, 2076 * stop iterating. 2077 */ 2078 break; 2079 } 2080 } 2081 } 2082 2083 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 2084 struct spdk_nvme_qpair *qpair); 2085 2086 static int 2087 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 2088 { 2089 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2090 uint32_t reaped; 2091 int rc; 2092 2093 if (qpair->poll_group == NULL) { 2094 rc = spdk_sock_flush(tqpair->sock); 2095 if (rc < 0 && errno != EAGAIN) { 2096 SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair, 2097 errno, spdk_strerror(errno)); 2098 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 2099 nvme_tcp_qpair_check_timeout(qpair); 2100 } 2101 2102 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 2103 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2104 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 2105 } 2106 2107 /* Don't return errors until the qpair gets disconnected */ 2108 return 0; 2109 } 2110 2111 goto fail; 2112 } 2113 } 2114 2115 if (max_completions == 0) { 2116 max_completions = spdk_max(tqpair->num_entries, 1); 2117 } else { 2118 max_completions = spdk_min(max_completions, tqpair->num_entries); 2119 } 2120 2121 reaped = 0; 2122 rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions); 2123 if (rc < 0) { 2124 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 2125 errno, spdk_strerror(errno)); 2126 goto fail; 2127 } 2128 2129 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 2130 nvme_tcp_qpair_check_timeout(qpair); 2131 } 2132 2133 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 2134 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 2135 if (rc != 0 && rc != -EAGAIN) { 2136 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 2137 goto fail; 2138 } else if (rc == 0) { 2139 /* Once the connection is completed, we can submit queued requests */ 2140 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 2141 } 2142 } 2143 2144 return reaped; 2145 fail: 2146 2147 /* 2148 * Since admin queues take the ctrlr_lock before entering this function, 2149 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 2150 * to call the generic function which will take the lock for us. 2151 */ 2152 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 2153 2154 if (nvme_qpair_is_admin_queue(qpair)) { 2155 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 2156 } else { 2157 nvme_ctrlr_disconnect_qpair(qpair); 2158 } 2159 return -ENXIO; 2160 } 2161 2162 static void 2163 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 2164 { 2165 struct spdk_nvme_qpair *qpair = ctx; 2166 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 2167 int32_t num_completions; 2168 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2169 2170 if (tqpair->needs_poll) { 2171 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 2172 tqpair->needs_poll = false; 2173 } 2174 2175 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 2176 2177 if (pgroup->num_completions >= 0 && num_completions >= 0) { 2178 pgroup->num_completions += num_completions; 2179 pgroup->stats.nvme_completions += num_completions; 2180 } else { 2181 pgroup->num_completions = -ENXIO; 2182 } 2183 } 2184 2185 static int 2186 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 2187 { 2188 struct spdk_nvme_tcp_ic_req *ic_req; 2189 struct nvme_tcp_pdu *pdu; 2190 uint32_t timeout_in_sec; 2191 2192 pdu = tqpair->send_pdu; 2193 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 2194 ic_req = &pdu->hdr.ic_req; 2195 2196 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 2197 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 2198 ic_req->pfv = 0; 2199 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 2200 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 2201 2202 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 2203 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 2204 2205 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 2206 2207 timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC; 2208 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz()); 2209 return 0; 2210 } 2211 2212 static int 2213 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2214 { 2215 struct sockaddr_storage dst_addr; 2216 struct sockaddr_storage src_addr; 2217 int rc; 2218 struct nvme_tcp_qpair *tqpair; 2219 int family; 2220 long int port, src_port; 2221 char *sock_impl_name; 2222 struct spdk_sock_impl_opts impl_opts = {}; 2223 size_t impl_opts_size = sizeof(impl_opts); 2224 struct spdk_sock_opts opts; 2225 struct nvme_tcp_ctrlr *tcp_ctrlr; 2226 2227 tqpair = nvme_tcp_qpair(qpair); 2228 2229 switch (ctrlr->trid.adrfam) { 2230 case SPDK_NVMF_ADRFAM_IPV4: 2231 family = AF_INET; 2232 break; 2233 case SPDK_NVMF_ADRFAM_IPV6: 2234 family = AF_INET6; 2235 break; 2236 default: 2237 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 2238 rc = -1; 2239 return rc; 2240 } 2241 2242 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 2243 2244 memset(&dst_addr, 0, sizeof(dst_addr)); 2245 2246 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 2247 rc = nvme_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid, &port); 2248 if (rc != 0) { 2249 SPDK_ERRLOG("dst_addr nvme_parse_addr() failed\n"); 2250 return rc; 2251 } 2252 2253 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 2254 memset(&src_addr, 0, sizeof(src_addr)); 2255 rc = nvme_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid, &src_port); 2256 if (rc != 0) { 2257 SPDK_ERRLOG("src_addr nvme_parse_addr() failed\n"); 2258 return rc; 2259 } 2260 } 2261 2262 tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 2263 sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL; 2264 SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name); 2265 2266 if (sock_impl_name) { 2267 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 2268 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 2269 impl_opts.psk_identity = tcp_ctrlr->psk_identity; 2270 impl_opts.psk_key = tcp_ctrlr->psk; 2271 impl_opts.psk_key_size = tcp_ctrlr->psk_size; 2272 impl_opts.tls_cipher_suites = tcp_ctrlr->tls_cipher_suite; 2273 } 2274 opts.opts_size = sizeof(opts); 2275 spdk_sock_get_default_opts(&opts); 2276 opts.priority = ctrlr->trid.priority; 2277 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 2278 if (ctrlr->opts.transport_ack_timeout) { 2279 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 2280 } 2281 if (sock_impl_name) { 2282 opts.impl_opts = &impl_opts; 2283 opts.impl_opts_size = sizeof(impl_opts); 2284 } 2285 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts); 2286 if (!tqpair->sock) { 2287 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 2288 tqpair, ctrlr->trid.traddr, port); 2289 rc = -1; 2290 return rc; 2291 } 2292 2293 return 0; 2294 } 2295 2296 static int 2297 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2298 { 2299 struct nvme_tcp_qpair *tqpair; 2300 int rc; 2301 2302 tqpair = nvme_tcp_qpair(qpair); 2303 2304 /* Prevent this function from being called recursively, as it could lead to issues with 2305 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 2306 * call. 2307 */ 2308 if (tqpair->flags.in_connect_poll) { 2309 return -EAGAIN; 2310 } 2311 2312 tqpair->flags.in_connect_poll = 1; 2313 2314 switch (tqpair->state) { 2315 case NVME_TCP_QPAIR_STATE_INVALID: 2316 case NVME_TCP_QPAIR_STATE_INITIALIZING: 2317 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 2318 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 2319 rc = -ETIMEDOUT; 2320 break; 2321 } 2322 rc = -EAGAIN; 2323 break; 2324 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 2325 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 2326 if (rc < 0) { 2327 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 2328 break; 2329 } 2330 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 2331 rc = -EAGAIN; 2332 break; 2333 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 2334 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 2335 if (rc == 0) { 2336 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 2337 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 2338 } else if (rc != -EAGAIN) { 2339 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 2340 } 2341 break; 2342 case NVME_TCP_QPAIR_STATE_RUNNING: 2343 rc = 0; 2344 break; 2345 default: 2346 assert(false); 2347 rc = -EINVAL; 2348 break; 2349 } 2350 2351 tqpair->flags.in_connect_poll = 0; 2352 return rc; 2353 } 2354 2355 static int 2356 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2357 { 2358 int rc = 0; 2359 struct nvme_tcp_qpair *tqpair; 2360 struct nvme_tcp_poll_group *tgroup; 2361 2362 tqpair = nvme_tcp_qpair(qpair); 2363 2364 if (!tqpair->sock) { 2365 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2366 if (rc < 0) { 2367 return rc; 2368 } 2369 } 2370 2371 if (qpair->poll_group) { 2372 rc = nvme_poll_group_connect_qpair(qpair); 2373 if (rc) { 2374 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2375 return rc; 2376 } 2377 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2378 tqpair->stats = &tgroup->stats; 2379 tqpair->shared_stats = true; 2380 } else { 2381 /* When resetting a controller, we disconnect adminq and then reconnect. The stats 2382 * is not freed when disconnecting. So when reconnecting, don't allocate memory 2383 * again. 2384 */ 2385 if (tqpair->stats == NULL) { 2386 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2387 if (!tqpair->stats) { 2388 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2389 return -ENOMEM; 2390 } 2391 } 2392 } 2393 2394 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2395 /* Explicitly set the state and recv_state of tqpair */ 2396 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2397 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2398 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2399 } 2400 rc = nvme_tcp_qpair_icreq_send(tqpair); 2401 if (rc != 0) { 2402 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2403 return rc; 2404 } 2405 2406 return rc; 2407 } 2408 2409 static struct spdk_nvme_qpair * 2410 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2411 uint16_t qid, uint32_t qsize, 2412 enum spdk_nvme_qprio qprio, 2413 uint32_t num_requests, bool async) 2414 { 2415 struct nvme_tcp_qpair *tqpair; 2416 struct spdk_nvme_qpair *qpair; 2417 int rc; 2418 2419 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2420 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2421 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2422 return NULL; 2423 } 2424 2425 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2426 if (!tqpair) { 2427 SPDK_ERRLOG("failed to get create tqpair\n"); 2428 return NULL; 2429 } 2430 2431 /* Set num_entries one less than queue size. According to NVMe 2432 * and NVMe-oF specs we can not submit queue size requests, 2433 * one slot shall always remain empty. 2434 */ 2435 tqpair->num_entries = qsize - 1; 2436 qpair = &tqpair->qpair; 2437 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2438 if (rc != 0) { 2439 free(tqpair); 2440 return NULL; 2441 } 2442 2443 rc = nvme_tcp_alloc_reqs(tqpair); 2444 if (rc) { 2445 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2446 return NULL; 2447 } 2448 2449 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2450 * So create the socket first when creating a qpair. */ 2451 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2452 if (rc) { 2453 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2454 return NULL; 2455 } 2456 2457 return qpair; 2458 } 2459 2460 static struct spdk_nvme_qpair * 2461 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2462 const struct spdk_nvme_io_qpair_opts *opts) 2463 { 2464 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2465 opts->io_queue_requests, opts->async_mode); 2466 } 2467 2468 /* We have to use the typedef in the function declaration to appease astyle. */ 2469 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2470 2471 static int 2472 nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr) 2473 { 2474 int rc; 2475 uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {}; 2476 uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {}; 2477 uint8_t tls_cipher_suite; 2478 uint8_t psk_retained_hash; 2479 uint64_t psk_configured_size; 2480 2481 assert(tctrlr != NULL); 2482 2483 rc = nvme_tcp_parse_interchange_psk(tctrlr->ctrlr.opts.psk, psk_configured, sizeof(psk_configured), 2484 &psk_configured_size, &psk_retained_hash); 2485 if (rc < 0) { 2486 SPDK_ERRLOG("Failed to parse PSK interchange!\n"); 2487 goto finish; 2488 } 2489 2490 /* The Base64 string encodes the configured PSK (32 or 48 bytes binary). 2491 * This check also ensures that psk_configured_size is smaller than 2492 * psk_retained buffer size. */ 2493 if (psk_configured_size == SHA256_DIGEST_LENGTH) { 2494 tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256; 2495 tctrlr->tls_cipher_suite = "TLS_AES_128_GCM_SHA256"; 2496 } else if (psk_configured_size == SHA384_DIGEST_LENGTH) { 2497 tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384; 2498 tctrlr->tls_cipher_suite = "TLS_AES_256_GCM_SHA384"; 2499 } else { 2500 SPDK_ERRLOG("Unrecognized cipher suite!\n"); 2501 rc = -ENOTSUP; 2502 goto finish; 2503 } 2504 2505 rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity), 2506 tctrlr->ctrlr.opts.hostnqn, tctrlr->ctrlr.trid.subnqn, 2507 tls_cipher_suite); 2508 if (rc) { 2509 SPDK_ERRLOG("could not generate PSK identity\n"); 2510 goto finish; 2511 } 2512 2513 /* No hash indicates that Configured PSK must be used as Retained PSK. */ 2514 if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) { 2515 assert(psk_configured_size < sizeof(psk_retained)); 2516 memcpy(psk_retained, psk_configured, psk_configured_size); 2517 rc = psk_configured_size; 2518 } else { 2519 /* Derive retained PSK. */ 2520 rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, tctrlr->ctrlr.opts.hostnqn, 2521 psk_retained, sizeof(psk_retained), psk_retained_hash); 2522 if (rc < 0) { 2523 SPDK_ERRLOG("Unable to derive retained PSK!\n"); 2524 goto finish; 2525 } 2526 } 2527 2528 rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk, 2529 sizeof(tctrlr->psk), tls_cipher_suite); 2530 if (rc < 0) { 2531 SPDK_ERRLOG("Could not generate TLS PSK!\n"); 2532 return rc; 2533 } 2534 2535 tctrlr->psk_size = rc; 2536 rc = 0; 2537 2538 finish: 2539 spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured)); 2540 2541 return rc; 2542 } 2543 2544 static spdk_nvme_ctrlr_t * 2545 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2546 const struct spdk_nvme_ctrlr_opts *opts, 2547 void *devhandle) 2548 { 2549 struct nvme_tcp_ctrlr *tctrlr; 2550 int rc; 2551 2552 tctrlr = calloc(1, sizeof(*tctrlr)); 2553 if (tctrlr == NULL) { 2554 SPDK_ERRLOG("could not allocate ctrlr\n"); 2555 return NULL; 2556 } 2557 2558 tctrlr->ctrlr.opts = *opts; 2559 tctrlr->ctrlr.trid = *trid; 2560 2561 if (opts->psk[0] != '\0') { 2562 rc = nvme_tcp_generate_tls_credentials(tctrlr); 2563 spdk_memset_s(&tctrlr->ctrlr.opts.psk, sizeof(tctrlr->ctrlr.opts.psk), 0, 2564 sizeof(tctrlr->ctrlr.opts.psk)); 2565 2566 if (rc != 0) { 2567 free(tctrlr); 2568 return NULL; 2569 } 2570 } 2571 2572 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2573 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2574 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2575 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2576 } 2577 2578 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2579 if (rc != 0) { 2580 free(tctrlr); 2581 return NULL; 2582 } 2583 2584 /* Only advertise support for accel sequences if data digest is enabled, otherwise it 2585 * doesn't provide any benefits to finish the sequences here */ 2586 if (opts->data_digest) { 2587 tctrlr->ctrlr.flags |= SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED; 2588 } 2589 2590 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2591 tctrlr->ctrlr.opts.admin_queue_size, 0, 2592 tctrlr->ctrlr.opts.admin_queue_size, true); 2593 if (!tctrlr->ctrlr.adminq) { 2594 SPDK_ERRLOG("failed to create admin qpair\n"); 2595 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2596 return NULL; 2597 } 2598 2599 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2600 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2601 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2602 return NULL; 2603 } 2604 2605 return &tctrlr->ctrlr; 2606 } 2607 2608 static uint32_t 2609 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2610 { 2611 /* TCP transport doesn't limit maximum IO transfer size. */ 2612 return UINT32_MAX; 2613 } 2614 2615 static uint16_t 2616 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2617 { 2618 return NVME_TCP_MAX_SGL_DESCRIPTORS; 2619 } 2620 2621 static int 2622 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2623 int (*iter_fn)(struct nvme_request *req, void *arg), 2624 void *arg) 2625 { 2626 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2627 struct nvme_tcp_req *tcp_req, *tmp; 2628 int rc; 2629 2630 assert(iter_fn != NULL); 2631 2632 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2633 assert(tcp_req->req != NULL); 2634 2635 rc = iter_fn(tcp_req->req, arg); 2636 if (rc != 0) { 2637 return rc; 2638 } 2639 } 2640 2641 return 0; 2642 } 2643 2644 static void 2645 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2646 { 2647 struct nvme_tcp_req *tcp_req, *tmp; 2648 struct spdk_nvme_cpl cpl = {}; 2649 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2650 2651 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2652 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2653 2654 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2655 assert(tcp_req->req != NULL); 2656 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2657 continue; 2658 } 2659 2660 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2661 } 2662 } 2663 2664 static struct spdk_nvme_transport_poll_group * 2665 nvme_tcp_poll_group_create(void) 2666 { 2667 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2668 2669 if (group == NULL) { 2670 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2671 return NULL; 2672 } 2673 2674 TAILQ_INIT(&group->needs_poll); 2675 2676 group->sock_group = spdk_sock_group_create(group); 2677 if (group->sock_group == NULL) { 2678 free(group); 2679 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2680 return NULL; 2681 } 2682 2683 return &group->group; 2684 } 2685 2686 static struct spdk_nvme_transport_poll_group * 2687 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2688 { 2689 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2690 struct spdk_sock_group *group = NULL; 2691 int rc; 2692 2693 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2694 if (!rc && group != NULL) { 2695 return spdk_sock_group_get_ctx(group); 2696 } 2697 2698 return NULL; 2699 } 2700 2701 static int 2702 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2703 { 2704 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2705 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2706 2707 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2708 return -EPROTO; 2709 } 2710 return 0; 2711 } 2712 2713 static int 2714 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2715 { 2716 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2717 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2718 2719 if (tqpair->needs_poll) { 2720 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2721 tqpair->needs_poll = false; 2722 } 2723 2724 if (tqpair->sock && group->sock_group) { 2725 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2726 return -EPROTO; 2727 } 2728 } 2729 return 0; 2730 } 2731 2732 static int 2733 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2734 struct spdk_nvme_qpair *qpair) 2735 { 2736 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2737 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2738 2739 /* disconnected qpairs won't have a sock to add. */ 2740 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2741 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2742 return -EPROTO; 2743 } 2744 } 2745 2746 return 0; 2747 } 2748 2749 static int 2750 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2751 struct spdk_nvme_qpair *qpair) 2752 { 2753 struct nvme_tcp_qpair *tqpair; 2754 struct nvme_tcp_poll_group *group; 2755 2756 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2757 2758 tqpair = nvme_tcp_qpair(qpair); 2759 group = nvme_tcp_poll_group(tgroup); 2760 2761 assert(tqpair->shared_stats == true); 2762 tqpair->stats = &g_dummy_stats; 2763 2764 if (tqpair->needs_poll) { 2765 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2766 tqpair->needs_poll = false; 2767 } 2768 2769 return 0; 2770 } 2771 2772 static int64_t 2773 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2774 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2775 { 2776 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2777 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2778 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2779 int num_events; 2780 2781 group->completions_per_qpair = completions_per_qpair; 2782 group->num_completions = 0; 2783 group->stats.polls++; 2784 2785 num_events = spdk_sock_group_poll(group->sock_group); 2786 2787 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2788 tqpair = nvme_tcp_qpair(qpair); 2789 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 2790 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2791 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 2792 } 2793 } 2794 /* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might 2795 * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and 2796 * might still have outstanding requests) */ 2797 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) { 2798 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2799 } 2800 } 2801 2802 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2803 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2804 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2805 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2806 } 2807 2808 if (spdk_unlikely(num_events < 0)) { 2809 return num_events; 2810 } 2811 2812 group->stats.idle_polls += !num_events; 2813 group->stats.socket_completions += num_events; 2814 2815 return group->num_completions; 2816 } 2817 2818 static int 2819 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2820 { 2821 int rc; 2822 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2823 2824 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2825 return -EBUSY; 2826 } 2827 2828 rc = spdk_sock_group_close(&group->sock_group); 2829 if (rc != 0) { 2830 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2831 assert(false); 2832 } 2833 2834 free(tgroup); 2835 2836 return 0; 2837 } 2838 2839 static int 2840 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2841 struct spdk_nvme_transport_poll_group_stat **_stats) 2842 { 2843 struct nvme_tcp_poll_group *group; 2844 struct spdk_nvme_transport_poll_group_stat *stats; 2845 2846 if (tgroup == NULL || _stats == NULL) { 2847 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2848 return -EINVAL; 2849 } 2850 2851 group = nvme_tcp_poll_group(tgroup); 2852 2853 stats = calloc(1, sizeof(*stats)); 2854 if (!stats) { 2855 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2856 return -ENOMEM; 2857 } 2858 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2859 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2860 2861 *_stats = stats; 2862 2863 return 0; 2864 } 2865 2866 static void 2867 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2868 struct spdk_nvme_transport_poll_group_stat *stats) 2869 { 2870 free(stats); 2871 } 2872 2873 const struct spdk_nvme_transport_ops tcp_ops = { 2874 .name = "TCP", 2875 .type = SPDK_NVME_TRANSPORT_TCP, 2876 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2877 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2878 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2879 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2880 2881 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2882 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2883 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2884 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2885 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2886 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2887 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2888 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2889 2890 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2891 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2892 2893 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2894 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2895 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2896 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2897 2898 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2899 .qpair_reset = nvme_tcp_qpair_reset, 2900 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2901 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2902 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2903 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2904 2905 .poll_group_create = nvme_tcp_poll_group_create, 2906 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2907 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2908 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2909 .poll_group_add = nvme_tcp_poll_group_add, 2910 .poll_group_remove = nvme_tcp_poll_group_remove, 2911 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2912 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2913 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2914 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2915 }; 2916 2917 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2918 2919 SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP) 2920 { 2921 struct spdk_trace_tpoint_opts opts[] = { 2922 { 2923 "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT, 2924 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1, 2925 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2926 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2927 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2928 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2929 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2930 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2931 } 2932 }, 2933 { 2934 "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE, 2935 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0, 2936 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2937 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2938 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2939 } 2940 }, 2941 }; 2942 2943 spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p'); 2944 spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q'); 2945 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 2946 } 2947