1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/trace.h" 22 #include "spdk/util.h" 23 #include "spdk/nvmf.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 #include "spdk_internal/trace_defs.h" 27 28 #define NVME_TCP_RW_BUFFER_SIZE 131072 29 30 /* For async connect workloads, allow more time since we are more likely 31 * to be processing lots ICREQs at once. 32 */ 33 #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */ 34 #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */ 35 36 #define NVME_TCP_HPDA_DEFAULT 0 37 #define NVME_TCP_MAX_R2T_DEFAULT 1 38 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 39 40 /* 41 * Maximum value of transport_ack_timeout used by TCP controller 42 */ 43 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 44 45 46 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 47 struct nvme_tcp_ctrlr { 48 struct spdk_nvme_ctrlr ctrlr; 49 char psk_identity[NVMF_PSK_IDENTITY_LEN]; 50 uint8_t psk[SPDK_TLS_PSK_MAX_LEN]; 51 int psk_size; 52 char *tls_cipher_suite; 53 }; 54 55 struct nvme_tcp_poll_group { 56 struct spdk_nvme_transport_poll_group group; 57 struct spdk_sock_group *sock_group; 58 uint32_t completions_per_qpair; 59 int64_t num_completions; 60 61 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 62 struct spdk_nvme_tcp_stat stats; 63 }; 64 65 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 66 struct nvme_tcp_qpair { 67 struct spdk_nvme_qpair qpair; 68 struct spdk_sock *sock; 69 70 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 71 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 72 73 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 74 struct nvme_tcp_pdu *recv_pdu; 75 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 76 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 77 enum nvme_tcp_pdu_recv_state recv_state; 78 struct nvme_tcp_req *tcp_reqs; 79 struct spdk_nvme_tcp_stat *stats; 80 81 uint16_t num_entries; 82 uint16_t async_complete; 83 84 struct { 85 uint16_t host_hdgst_enable: 1; 86 uint16_t host_ddgst_enable: 1; 87 uint16_t icreq_send_ack: 1; 88 uint16_t in_connect_poll: 1; 89 uint16_t reserved: 12; 90 } flags; 91 92 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 93 uint32_t maxh2cdata; 94 95 uint32_t maxr2t; 96 97 /* 0 based value, which is used to guide the padding */ 98 uint8_t cpda; 99 100 enum nvme_tcp_qpair_state state; 101 102 TAILQ_ENTRY(nvme_tcp_qpair) link; 103 bool needs_poll; 104 105 uint64_t icreq_timeout_tsc; 106 107 bool shared_stats; 108 }; 109 110 enum nvme_tcp_req_state { 111 NVME_TCP_REQ_FREE, 112 NVME_TCP_REQ_ACTIVE, 113 NVME_TCP_REQ_ACTIVE_R2T, 114 }; 115 116 struct nvme_tcp_req { 117 struct nvme_request *req; 118 enum nvme_tcp_req_state state; 119 uint16_t cid; 120 uint16_t ttag; 121 uint32_t datao; 122 uint32_t expected_datao; 123 uint32_t r2tl_remain; 124 uint32_t active_r2ts; 125 /* Used to hold a value received from subsequent R2T while we are still 126 * waiting for H2C complete */ 127 uint16_t ttag_r2t_next; 128 bool in_capsule_data; 129 /* It is used to track whether the req can be safely freed */ 130 union { 131 uint8_t raw; 132 struct { 133 /* The last send operation completed - kernel released send buffer */ 134 uint8_t send_ack : 1; 135 /* Data transfer completed - target send resp or last data bit */ 136 uint8_t data_recv : 1; 137 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 138 * from kernel) to send H2C */ 139 uint8_t h2c_send_waiting_ack : 1; 140 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 141 * Rare case, actual when dealing with target that can send several R2T requests. 142 * SPDK TCP target sends 1 R2T for the whole data buffer */ 143 uint8_t r2t_waiting_h2c_complete : 1; 144 /* Accel operation is in progress */ 145 uint8_t in_progress_accel : 1; 146 uint8_t reserved : 3; 147 } bits; 148 } ordering; 149 struct nvme_tcp_pdu *pdu; 150 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 151 uint32_t iovcnt; 152 /* Used to hold a value received from subsequent R2T while we are still 153 * waiting for H2C ack */ 154 uint32_t r2tl_remain_next; 155 struct nvme_tcp_qpair *tqpair; 156 TAILQ_ENTRY(nvme_tcp_req) link; 157 struct spdk_nvme_cpl rsp; 158 }; 159 160 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 161 162 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 163 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 164 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 165 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 166 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 167 struct spdk_nvme_cpl *rsp, bool print_on_error); 168 169 static inline struct nvme_tcp_qpair * 170 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 171 { 172 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 173 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 174 } 175 176 static inline struct nvme_tcp_poll_group * 177 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 178 { 179 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 180 } 181 182 static inline struct nvme_tcp_ctrlr * 183 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 184 { 185 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 186 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 187 } 188 189 static struct nvme_tcp_req * 190 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 191 { 192 struct nvme_tcp_req *tcp_req; 193 194 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 195 if (!tcp_req) { 196 return NULL; 197 } 198 199 assert(tcp_req->state == NVME_TCP_REQ_FREE); 200 tcp_req->state = NVME_TCP_REQ_ACTIVE; 201 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 202 tcp_req->datao = 0; 203 tcp_req->expected_datao = 0; 204 tcp_req->req = NULL; 205 tcp_req->in_capsule_data = false; 206 tcp_req->r2tl_remain = 0; 207 tcp_req->r2tl_remain_next = 0; 208 tcp_req->active_r2ts = 0; 209 tcp_req->iovcnt = 0; 210 tcp_req->ordering.raw = 0; 211 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 212 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 213 214 return tcp_req; 215 } 216 217 static void 218 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 219 { 220 assert(tcp_req->state != NVME_TCP_REQ_FREE); 221 tcp_req->state = NVME_TCP_REQ_FREE; 222 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 223 } 224 225 static inline void 226 nvme_tcp_accel_submit_crc32c(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq, 227 uint32_t *dst, struct iovec *iovs, uint32_t iovcnt, uint32_t seed, 228 spdk_nvme_accel_completion_cb cb_fn, void *cb_arg) 229 { 230 struct spdk_nvme_poll_group *pg = tgroup->group.group; 231 232 treq->ordering.bits.in_progress_accel = 1; 233 pg->accel_fn_table.submit_accel_crc32c(pg->ctx, dst, iovs, iovcnt, seed, cb_fn, cb_arg); 234 } 235 236 static inline void 237 nvme_tcp_accel_finish_sequence(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq, 238 void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg) 239 { 240 struct spdk_nvme_poll_group *pg = tgroup->group.group; 241 242 treq->ordering.bits.in_progress_accel = 1; 243 pg->accel_fn_table.finish_sequence(seq, cb_fn, cb_arg); 244 } 245 246 static inline void 247 nvme_tcp_accel_reverse_sequence(struct nvme_tcp_poll_group *tgroup, void *seq) 248 { 249 struct spdk_nvme_poll_group *pg = tgroup->group.group; 250 251 pg->accel_fn_table.reverse_sequence(seq); 252 } 253 254 static inline int 255 nvme_tcp_accel_append_crc32c(struct nvme_tcp_poll_group *tgroup, void **seq, uint32_t *dst, 256 struct iovec *iovs, uint32_t iovcnt, uint32_t seed, 257 spdk_nvme_accel_step_cb cb_fn, void *cb_arg) 258 { 259 struct spdk_nvme_poll_group *pg = tgroup->group.group; 260 261 return pg->accel_fn_table.append_crc32c(pg->ctx, seq, dst, iovs, iovcnt, NULL, NULL, 262 seed, cb_fn, cb_arg); 263 } 264 265 static int 266 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 267 { 268 struct addrinfo *res; 269 struct addrinfo hints; 270 int ret; 271 272 memset(&hints, 0, sizeof(hints)); 273 hints.ai_family = family; 274 hints.ai_socktype = SOCK_STREAM; 275 hints.ai_protocol = 0; 276 277 ret = getaddrinfo(addr, service, &hints, &res); 278 if (ret) { 279 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 280 return -(abs(ret)); 281 } 282 283 if (res->ai_addrlen > sizeof(*sa)) { 284 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 285 ret = -EINVAL; 286 } else { 287 memcpy(sa, res->ai_addr, res->ai_addrlen); 288 } 289 290 freeaddrinfo(res); 291 return ret; 292 } 293 294 static void 295 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 296 { 297 free(tqpair->tcp_reqs); 298 tqpair->tcp_reqs = NULL; 299 300 spdk_free(tqpair->send_pdus); 301 tqpair->send_pdus = NULL; 302 } 303 304 static int 305 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 306 { 307 uint16_t i; 308 struct nvme_tcp_req *tcp_req; 309 310 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 311 if (tqpair->tcp_reqs == NULL) { 312 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 313 goto fail; 314 } 315 316 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 317 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 318 0x1000, NULL, 319 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 320 321 if (tqpair->send_pdus == NULL) { 322 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 323 goto fail; 324 } 325 326 TAILQ_INIT(&tqpair->send_queue); 327 TAILQ_INIT(&tqpair->free_reqs); 328 TAILQ_INIT(&tqpair->outstanding_reqs); 329 for (i = 0; i < tqpair->num_entries; i++) { 330 tcp_req = &tqpair->tcp_reqs[i]; 331 tcp_req->cid = i; 332 tcp_req->tqpair = tqpair; 333 tcp_req->pdu = &tqpair->send_pdus[i]; 334 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 335 } 336 337 tqpair->send_pdu = &tqpair->send_pdus[i]; 338 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 339 340 return 0; 341 fail: 342 nvme_tcp_free_reqs(tqpair); 343 return -ENOMEM; 344 } 345 346 static inline void 347 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 348 enum nvme_tcp_pdu_recv_state state) 349 { 350 if (tqpair->recv_state == state) { 351 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 352 tqpair, state); 353 return; 354 } 355 356 if (state == NVME_TCP_PDU_RECV_STATE_ERROR) { 357 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 358 } 359 360 tqpair->recv_state = state; 361 } 362 363 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 364 365 static void 366 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 367 { 368 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 369 struct nvme_tcp_pdu *pdu; 370 int rc; 371 struct nvme_tcp_poll_group *group; 372 373 if (tqpair->needs_poll) { 374 group = nvme_tcp_poll_group(qpair->poll_group); 375 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 376 tqpair->needs_poll = false; 377 } 378 379 rc = spdk_sock_close(&tqpair->sock); 380 381 if (tqpair->sock != NULL) { 382 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 383 /* Set it to NULL manually */ 384 tqpair->sock = NULL; 385 } 386 387 /* clear the send_queue */ 388 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 389 pdu = TAILQ_FIRST(&tqpair->send_queue); 390 /* Remove the pdu from the send_queue to prevent the wrong sending out 391 * in the next round connection 392 */ 393 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 394 } 395 396 nvme_tcp_qpair_abort_reqs(qpair, 0); 397 398 /* If the qpair is marked as asynchronous, let it go through the process_completions() to 399 * let any outstanding requests (e.g. those with outstanding accel operations) complete. 400 * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be 401 * empty. 402 */ 403 if (qpair->async) { 404 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 405 } else { 406 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 407 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 408 } 409 } 410 411 static int 412 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 413 { 414 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 415 416 assert(qpair != NULL); 417 nvme_tcp_qpair_abort_reqs(qpair, 0); 418 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 419 420 nvme_qpair_deinit(qpair); 421 nvme_tcp_free_reqs(tqpair); 422 if (!tqpair->shared_stats) { 423 free(tqpair->stats); 424 } 425 free(tqpair); 426 427 return 0; 428 } 429 430 static int 431 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 432 { 433 return 0; 434 } 435 436 static int 437 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 438 { 439 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 440 441 if (ctrlr->adminq) { 442 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 443 } 444 445 nvme_ctrlr_destruct_finish(ctrlr); 446 447 free(tctrlr); 448 449 return 0; 450 } 451 452 static void 453 pdu_write_done(void *cb_arg, int err) 454 { 455 struct nvme_tcp_pdu *pdu = cb_arg; 456 struct nvme_tcp_qpair *tqpair = pdu->qpair; 457 struct nvme_tcp_poll_group *pgroup; 458 459 /* If there are queued requests, we assume they are queued because they are waiting 460 * for resources to be released. Those resources are almost certainly released in 461 * response to a PDU completing here. However, to attempt to make forward progress 462 * the qpair needs to be polled and we can't rely on another network event to make 463 * that happen. Add it to a list of qpairs to poll regardless of network activity 464 * here. 465 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 466 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 467 * forward progress in case that the resources are released after icreq's or CONNECT's 468 * resp is processed. */ 469 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 470 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 471 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 472 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 473 474 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 475 tqpair->needs_poll = true; 476 } 477 478 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 479 480 if (err != 0) { 481 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 482 return; 483 } 484 485 assert(pdu->cb_fn != NULL); 486 pdu->cb_fn(pdu->cb_arg); 487 } 488 489 static void 490 pdu_write_fail(struct nvme_tcp_pdu *pdu, int status) 491 { 492 struct nvme_tcp_qpair *tqpair = pdu->qpair; 493 494 /* This function is similar to pdu_write_done(), but it should be called before a PDU is 495 * sent over the socket */ 496 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 497 pdu_write_done(pdu, status); 498 } 499 500 static void 501 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 502 { 503 uint32_t mapped_length = 0; 504 struct nvme_tcp_qpair *tqpair = pdu->qpair; 505 506 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 507 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 508 &mapped_length); 509 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 510 if (spdk_unlikely(mapped_length < pdu->data_len)) { 511 SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len, 512 mapped_length); 513 pdu_write_done(pdu, -EINVAL); 514 return; 515 } 516 pdu->sock_req.cb_fn = pdu_write_done; 517 pdu->sock_req.cb_arg = pdu; 518 tqpair->stats->submitted_requests++; 519 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 520 } 521 522 static void 523 tcp_write_pdu_seq_cb(void *ctx, int status) 524 { 525 struct nvme_tcp_pdu *pdu = ctx; 526 struct nvme_tcp_req *treq = pdu->req; 527 struct nvme_request *req = treq->req; 528 529 assert(treq->ordering.bits.in_progress_accel); 530 treq->ordering.bits.in_progress_accel = 0; 531 532 req->accel_sequence = NULL; 533 if (spdk_unlikely(status != 0)) { 534 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 535 pdu_write_fail(pdu, status); 536 return; 537 } 538 539 _tcp_write_pdu(pdu); 540 } 541 542 static void 543 tcp_write_pdu(struct nvme_tcp_pdu *pdu) 544 { 545 struct nvme_tcp_req *treq = pdu->req; 546 struct nvme_tcp_qpair *tqpair = pdu->qpair; 547 struct nvme_tcp_poll_group *tgroup; 548 struct nvme_request *req; 549 550 if (spdk_likely(treq != NULL)) { 551 req = treq->req; 552 if (req->accel_sequence != NULL && 553 spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER && 554 pdu->data_len > 0) { 555 assert(tqpair->qpair.poll_group != NULL); 556 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 557 nvme_tcp_accel_finish_sequence(tgroup, treq, req->accel_sequence, 558 tcp_write_pdu_seq_cb, pdu); 559 return; 560 } 561 } 562 563 _tcp_write_pdu(pdu); 564 } 565 566 static void 567 pdu_accel_compute_crc32_done(void *cb_arg, int status) 568 { 569 struct nvme_tcp_pdu *pdu = cb_arg; 570 struct nvme_tcp_req *req = pdu->req; 571 572 assert(req->ordering.bits.in_progress_accel); 573 req->ordering.bits.in_progress_accel = 0; 574 575 if (spdk_unlikely(status)) { 576 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 577 pdu_write_fail(pdu, status); 578 return; 579 } 580 581 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 582 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 583 584 _tcp_write_pdu(pdu); 585 } 586 587 static void 588 pdu_accel_compute_crc32_seq_cb(void *cb_arg, int status) 589 { 590 struct nvme_tcp_pdu *pdu = cb_arg; 591 struct nvme_tcp_qpair *tqpair = pdu->qpair; 592 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 593 struct nvme_tcp_req *treq = pdu->req; 594 struct nvme_request *req = treq->req; 595 596 assert(treq->ordering.bits.in_progress_accel); 597 treq->ordering.bits.in_progress_accel = 0; 598 599 req->accel_sequence = NULL; 600 if (spdk_unlikely(status != 0)) { 601 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 602 pdu_write_fail(pdu, status); 603 return; 604 } 605 606 nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32, 607 pdu->data_iov, pdu->data_iovcnt, 0, 608 pdu_accel_compute_crc32_done, pdu); 609 } 610 611 static void 612 pdu_accel_seq_compute_crc32_done(void *cb_arg) 613 { 614 struct nvme_tcp_pdu *pdu = cb_arg; 615 616 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 617 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 618 } 619 620 static bool 621 pdu_accel_compute_crc32(struct nvme_tcp_pdu *pdu) 622 { 623 struct nvme_tcp_qpair *tqpair = pdu->qpair; 624 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 625 struct nvme_request *req = ((struct nvme_tcp_req *)pdu->req)->req; 626 int rc; 627 628 /* Only support this limited case for the first step */ 629 if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED || 630 pdu->dif_ctx != NULL || 631 pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0)) { 632 return false; 633 } 634 635 if (tqpair->qpair.poll_group == NULL) { 636 return false; 637 } 638 639 if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) { 640 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence, 641 &pdu->data_digest_crc32, 642 pdu->data_iov, pdu->data_iovcnt, 0, 643 pdu_accel_seq_compute_crc32_done, pdu); 644 if (spdk_unlikely(rc != 0)) { 645 /* If accel is out of resources, fall back to non-accelerated crc32 */ 646 if (rc == -ENOMEM) { 647 return false; 648 } 649 650 SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc); 651 pdu_write_fail(pdu, rc); 652 return true; 653 } 654 655 tcp_write_pdu(pdu); 656 return true; 657 } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) { 658 if (req->accel_sequence != NULL) { 659 nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence, 660 pdu_accel_compute_crc32_seq_cb, pdu); 661 } else { 662 nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32, 663 pdu->data_iov, pdu->data_iovcnt, 0, 664 pdu_accel_compute_crc32_done, pdu); 665 } 666 667 return true; 668 } 669 670 return false; 671 } 672 673 static void 674 pdu_compute_crc32_seq_cb(void *cb_arg, int status) 675 { 676 struct nvme_tcp_pdu *pdu = cb_arg; 677 struct nvme_tcp_req *treq = pdu->req; 678 struct nvme_request *req = treq->req; 679 uint32_t crc32c; 680 681 assert(treq->ordering.bits.in_progress_accel); 682 treq->ordering.bits.in_progress_accel = 0; 683 684 req->accel_sequence = NULL; 685 if (spdk_unlikely(status != 0)) { 686 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 687 pdu_write_fail(pdu, status); 688 return; 689 } 690 691 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 692 crc32c = crc32c ^ SPDK_CRC32C_XOR; 693 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 694 695 _tcp_write_pdu(pdu); 696 } 697 698 static void 699 pdu_compute_crc32(struct nvme_tcp_pdu *pdu) 700 { 701 struct nvme_tcp_qpair *tqpair = pdu->qpair; 702 struct nvme_tcp_poll_group *tgroup; 703 struct nvme_request *req; 704 uint32_t crc32c; 705 706 /* Data Digest */ 707 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 708 tqpair->flags.host_ddgst_enable) { 709 if (pdu_accel_compute_crc32(pdu)) { 710 return; 711 } 712 713 req = ((struct nvme_tcp_req *)pdu->req)->req; 714 if (req->accel_sequence != NULL) { 715 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 716 nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence, 717 pdu_compute_crc32_seq_cb, pdu); 718 return; 719 } 720 721 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 722 crc32c = crc32c ^ SPDK_CRC32C_XOR; 723 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 724 } 725 726 tcp_write_pdu(pdu); 727 } 728 729 static int 730 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 731 struct nvme_tcp_pdu *pdu, 732 nvme_tcp_qpair_xfer_complete_cb cb_fn, 733 void *cb_arg) 734 { 735 int hlen; 736 uint32_t crc32c; 737 738 hlen = pdu->hdr.common.hlen; 739 pdu->cb_fn = cb_fn; 740 pdu->cb_arg = cb_arg; 741 pdu->qpair = tqpair; 742 743 /* Header Digest */ 744 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 745 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 746 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 747 } 748 749 pdu_compute_crc32(pdu); 750 751 return 0; 752 } 753 754 /* 755 * Build SGL describing contiguous payload buffer. 756 */ 757 static int 758 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 759 { 760 struct nvme_request *req = tcp_req->req; 761 762 /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL, 763 * so just double cast it to make it go away */ 764 tcp_req->iov[0].iov_base = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset); 765 tcp_req->iov[0].iov_len = req->payload_size; 766 tcp_req->iovcnt = 1; 767 768 SPDK_DEBUGLOG(nvme, "enter\n"); 769 770 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 771 772 return 0; 773 } 774 775 /* 776 * Build SGL describing scattered payload buffer. 777 */ 778 static int 779 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 780 { 781 int rc; 782 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 783 struct nvme_request *req = tcp_req->req; 784 785 SPDK_DEBUGLOG(nvme, "enter\n"); 786 787 assert(req->payload_size != 0); 788 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 789 assert(req->payload.reset_sgl_fn != NULL); 790 assert(req->payload.next_sge_fn != NULL); 791 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 792 793 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 794 remaining_size = req->payload_size; 795 796 do { 797 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 798 &length); 799 if (rc) { 800 return -1; 801 } 802 803 length = spdk_min(length, remaining_size); 804 tcp_req->iov[iovcnt].iov_len = length; 805 remaining_size -= length; 806 iovcnt++; 807 } while (remaining_size > 0 && iovcnt < max_num_sgl); 808 809 810 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 811 if (remaining_size > 0) { 812 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 813 tcp_req, iovcnt, remaining_size); 814 return -1; 815 } 816 817 tcp_req->iovcnt = iovcnt; 818 819 return 0; 820 } 821 822 static int 823 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 824 struct nvme_tcp_req *tcp_req) 825 { 826 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 827 int rc = 0; 828 enum spdk_nvme_data_transfer xfer; 829 uint32_t max_in_capsule_data_size; 830 831 tcp_req->req = req; 832 req->cmd.cid = tcp_req->cid; 833 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 834 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 835 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 836 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 837 838 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 839 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 840 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 841 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 842 } else { 843 rc = -1; 844 } 845 846 if (rc) { 847 return rc; 848 } 849 850 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 851 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 852 853 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 854 } else { 855 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 856 } 857 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 858 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 859 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 860 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 861 } 862 863 if (req->payload_size <= max_in_capsule_data_size) { 864 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 865 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 866 req->cmd.dptr.sgl1.address = 0; 867 tcp_req->in_capsule_data = true; 868 } 869 } 870 871 return 0; 872 } 873 874 static inline bool 875 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 876 { 877 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 878 return false; 879 } 880 881 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 882 assert(tcp_req->tqpair != NULL); 883 assert(tcp_req->req != NULL); 884 885 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 886 887 if (!tcp_req->tqpair->qpair.in_completion_context) { 888 tcp_req->tqpair->async_complete++; 889 } 890 891 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 892 return true; 893 } 894 895 static void 896 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 897 { 898 struct nvme_tcp_req *tcp_req = cb_arg; 899 900 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 901 tcp_req->tqpair->qpair.id); 902 tcp_req->ordering.bits.send_ack = 1; 903 /* Handle the r2t case */ 904 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 905 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 906 nvme_tcp_send_h2c_data(tcp_req); 907 } else { 908 nvme_tcp_req_complete_safe(tcp_req); 909 } 910 } 911 912 static int 913 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 914 struct nvme_tcp_req *tcp_req) 915 { 916 struct nvme_tcp_pdu *pdu; 917 struct spdk_nvme_tcp_cmd *capsule_cmd; 918 uint32_t plen = 0, alignment; 919 uint8_t pdo; 920 921 SPDK_DEBUGLOG(nvme, "enter\n"); 922 pdu = tcp_req->pdu; 923 pdu->req = tcp_req; 924 925 capsule_cmd = &pdu->hdr.capsule_cmd; 926 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 927 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 928 capsule_cmd->ccsqe = tcp_req->req->cmd; 929 930 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 931 932 if (tqpair->flags.host_hdgst_enable) { 933 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 934 tcp_req); 935 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 936 plen += SPDK_NVME_TCP_DIGEST_LEN; 937 } 938 939 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 940 goto end; 941 } 942 943 pdo = plen; 944 pdu->padding_len = 0; 945 if (tqpair->cpda) { 946 alignment = (tqpair->cpda + 1) << 2; 947 if (alignment > plen) { 948 pdu->padding_len = alignment - plen; 949 pdo = alignment; 950 plen = alignment; 951 } 952 } 953 954 capsule_cmd->common.pdo = pdo; 955 plen += tcp_req->req->payload_size; 956 if (tqpair->flags.host_ddgst_enable) { 957 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 958 plen += SPDK_NVME_TCP_DIGEST_LEN; 959 } 960 961 tcp_req->datao = 0; 962 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 963 0, tcp_req->req->payload_size); 964 end: 965 capsule_cmd->common.plen = plen; 966 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 967 968 } 969 970 static int 971 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 972 struct nvme_request *req) 973 { 974 struct nvme_tcp_qpair *tqpair; 975 struct nvme_tcp_req *tcp_req; 976 977 tqpair = nvme_tcp_qpair(qpair); 978 assert(tqpair != NULL); 979 assert(req != NULL); 980 981 tcp_req = nvme_tcp_req_get(tqpair); 982 if (!tcp_req) { 983 tqpair->stats->queued_requests++; 984 /* Inform the upper layer to try again later. */ 985 return -EAGAIN; 986 } 987 988 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 989 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 990 nvme_tcp_req_put(tqpair, tcp_req); 991 return -1; 992 } 993 994 spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 995 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 996 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12); 997 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 998 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 999 } 1000 1001 static int 1002 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 1003 { 1004 return 0; 1005 } 1006 1007 static void 1008 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 1009 struct nvme_tcp_qpair *tqpair, 1010 struct spdk_nvme_cpl *rsp, 1011 bool print_on_error) 1012 { 1013 struct spdk_nvme_cpl cpl; 1014 spdk_nvme_cmd_cb user_cb; 1015 void *user_cb_arg; 1016 struct spdk_nvme_qpair *qpair; 1017 struct nvme_request *req; 1018 bool error, print_error; 1019 1020 assert(tcp_req->req != NULL); 1021 req = tcp_req->req; 1022 1023 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 1024 memcpy(&cpl, rsp, sizeof(cpl)); 1025 user_cb = req->cb_fn; 1026 user_cb_arg = req->cb_arg; 1027 qpair = req->qpair; 1028 1029 error = spdk_nvme_cpl_is_error(rsp); 1030 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 1031 1032 if (print_error) { 1033 spdk_nvme_qpair_print_command(qpair, &req->cmd); 1034 } 1035 1036 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 1037 spdk_nvme_qpair_print_completion(qpair, rsp); 1038 } 1039 1040 spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 1041 (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw); 1042 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 1043 nvme_tcp_req_put(tqpair, tcp_req); 1044 nvme_free_request(req); 1045 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 1046 } 1047 1048 static void 1049 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 1050 { 1051 struct nvme_tcp_req *tcp_req, *tmp; 1052 struct spdk_nvme_cpl cpl = {}; 1053 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1054 1055 cpl.sqid = qpair->id; 1056 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1057 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1058 cpl.status.dnr = dnr; 1059 1060 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1061 /* We cannot abort requests with accel operations in progress */ 1062 if (tcp_req->ordering.bits.in_progress_accel) { 1063 continue; 1064 } 1065 1066 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 1067 } 1068 } 1069 1070 static void 1071 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 1072 { 1073 struct nvme_tcp_qpair *tqpair = cb_arg; 1074 1075 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 1076 } 1077 1078 static void 1079 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1080 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 1081 { 1082 struct nvme_tcp_pdu *rsp_pdu; 1083 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 1084 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 1085 uint8_t copy_len; 1086 1087 rsp_pdu = tqpair->send_pdu; 1088 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1089 h2c_term_req = &rsp_pdu->hdr.term_req; 1090 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 1091 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 1092 1093 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1094 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1095 DSET32(&h2c_term_req->fei, error_offset); 1096 } 1097 1098 copy_len = pdu->hdr.common.hlen; 1099 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 1100 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 1101 } 1102 1103 /* Copy the error info into the buffer */ 1104 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 1105 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 1106 1107 /* Contain the header len of the wrong received pdu */ 1108 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 1109 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1110 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 1111 } 1112 1113 static bool 1114 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 1115 { 1116 switch (tqpair->state) { 1117 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1118 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1119 case NVME_TCP_QPAIR_STATE_RUNNING: 1120 return true; 1121 default: 1122 return false; 1123 } 1124 } 1125 1126 static void 1127 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 1128 { 1129 struct nvme_tcp_pdu *pdu; 1130 uint32_t error_offset = 0; 1131 enum spdk_nvme_tcp_term_req_fes fes; 1132 uint32_t expected_hlen, hd_len = 0; 1133 bool plen_error = false; 1134 1135 pdu = tqpair->recv_pdu; 1136 1137 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 1138 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 1139 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 1140 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 1141 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1142 goto err; 1143 } 1144 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 1145 if (pdu->hdr.common.plen != expected_hlen) { 1146 plen_error = true; 1147 } 1148 } else { 1149 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 1150 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 1151 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 1152 goto err; 1153 } 1154 1155 switch (pdu->hdr.common.pdu_type) { 1156 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1157 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 1158 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 1159 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 1160 } 1161 1162 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 1163 plen_error = true; 1164 } 1165 break; 1166 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1167 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1168 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 1169 plen_error = true; 1170 } 1171 break; 1172 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1173 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 1174 if ((pdu->hdr.common.plen <= expected_hlen) || 1175 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 1176 plen_error = true; 1177 } 1178 break; 1179 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1180 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 1181 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 1182 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 1183 } 1184 1185 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 1186 plen_error = true; 1187 } 1188 break; 1189 1190 default: 1191 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1192 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1193 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 1194 goto err; 1195 } 1196 } 1197 1198 if (pdu->hdr.common.hlen != expected_hlen) { 1199 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 1200 expected_hlen, pdu->hdr.common.hlen); 1201 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1202 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 1203 goto err; 1204 1205 } else if (plen_error) { 1206 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1207 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 1208 goto err; 1209 } else { 1210 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1211 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 1212 return; 1213 } 1214 err: 1215 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1216 } 1217 1218 static struct nvme_tcp_req * 1219 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 1220 { 1221 assert(tqpair != NULL); 1222 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 1223 return NULL; 1224 } 1225 1226 return &tqpair->tcp_reqs[cid]; 1227 } 1228 1229 static void 1230 nvme_tcp_recv_payload_seq_cb(void *cb_arg, int status) 1231 { 1232 struct nvme_tcp_req *treq = cb_arg; 1233 struct nvme_request *req = treq->req; 1234 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1235 struct nvme_tcp_poll_group *group; 1236 1237 assert(treq->ordering.bits.in_progress_accel); 1238 treq->ordering.bits.in_progress_accel = 0; 1239 1240 /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see 1241 * comment in pdu_write_done(). */ 1242 if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) { 1243 group = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1244 TAILQ_INSERT_TAIL(&group->needs_poll, tqpair, link); 1245 tqpair->needs_poll = true; 1246 } 1247 1248 req->accel_sequence = NULL; 1249 if (spdk_unlikely(status != 0)) { 1250 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status); 1251 treq->rsp.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1252 } 1253 1254 nvme_tcp_req_complete_safe(treq); 1255 } 1256 1257 static void 1258 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 1259 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 1260 { 1261 struct nvme_tcp_req *tcp_req; 1262 struct nvme_tcp_poll_group *tgroup; 1263 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 1264 uint8_t flags; 1265 1266 tcp_req = pdu->req; 1267 assert(tcp_req != NULL); 1268 1269 SPDK_DEBUGLOG(nvme, "enter\n"); 1270 c2h_data = &pdu->hdr.c2h_data; 1271 tcp_req->datao += pdu->data_len; 1272 flags = c2h_data->common.flags; 1273 1274 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 1275 if (tcp_req->datao == tcp_req->req->payload_size) { 1276 tcp_req->rsp.status.p = 0; 1277 } else { 1278 tcp_req->rsp.status.p = 1; 1279 } 1280 1281 tcp_req->rsp.cid = tcp_req->cid; 1282 tcp_req->rsp.sqid = tqpair->qpair.id; 1283 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1284 tcp_req->ordering.bits.data_recv = 1; 1285 if (tcp_req->req->accel_sequence != NULL) { 1286 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1287 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence); 1288 nvme_tcp_accel_finish_sequence(tgroup, tcp_req, 1289 tcp_req->req->accel_sequence, 1290 nvme_tcp_recv_payload_seq_cb, 1291 tcp_req); 1292 return; 1293 } 1294 1295 if (nvme_tcp_req_complete_safe(tcp_req)) { 1296 (*reaped)++; 1297 } 1298 } 1299 } 1300 } 1301 1302 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1303 "Invalid PDU Header Field", 1304 "PDU Sequence Error", 1305 "Header Digest Error", 1306 "Data Transfer Out of Range", 1307 "Data Transfer Limit Exceeded", 1308 "Unsupported parameter", 1309 }; 1310 1311 static void 1312 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1313 { 1314 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1315 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1316 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1317 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1318 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1319 DGET32(c2h_term_req->fei)); 1320 } 1321 /* we may also need to dump some other info here */ 1322 } 1323 1324 static void 1325 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1326 struct nvme_tcp_pdu *pdu) 1327 { 1328 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1329 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1330 } 1331 1332 static void 1333 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1334 { 1335 struct nvme_tcp_pdu *pdu; 1336 1337 assert(tqpair != NULL); 1338 pdu = tqpair->recv_pdu; 1339 1340 switch (pdu->hdr.common.pdu_type) { 1341 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1342 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1343 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1344 break; 1345 1346 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1347 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1348 break; 1349 1350 default: 1351 /* The code should not go to here */ 1352 SPDK_ERRLOG("The code should not go to here\n"); 1353 break; 1354 } 1355 } 1356 1357 static void 1358 nvme_tcp_accel_recv_compute_crc32_done(void *cb_arg, int status) 1359 { 1360 struct nvme_tcp_req *tcp_req = cb_arg; 1361 struct nvme_tcp_pdu *pdu; 1362 struct nvme_tcp_qpair *tqpair; 1363 int rc; 1364 struct nvme_tcp_poll_group *pgroup; 1365 int dummy_reaped = 0; 1366 1367 pdu = tcp_req->pdu; 1368 assert(pdu != NULL); 1369 1370 tqpair = tcp_req->tqpair; 1371 assert(tqpair != NULL); 1372 1373 assert(tcp_req->ordering.bits.in_progress_accel); 1374 tcp_req->ordering.bits.in_progress_accel = 0; 1375 1376 /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see 1377 * comment in pdu_write_done(). */ 1378 if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) { 1379 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1380 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1381 tqpair->needs_poll = true; 1382 } 1383 1384 if (spdk_unlikely(status)) { 1385 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1386 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1387 goto end; 1388 } 1389 1390 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1391 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1392 if (rc == 0) { 1393 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1394 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1395 } 1396 1397 end: 1398 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1399 } 1400 1401 static void 1402 nvme_tcp_req_copy_pdu(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu) 1403 { 1404 treq->pdu->hdr = pdu->hdr; 1405 treq->pdu->req = treq; 1406 memcpy(treq->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1407 memcpy(treq->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1408 treq->pdu->data_iovcnt = pdu->data_iovcnt; 1409 treq->pdu->data_len = pdu->data_len; 1410 } 1411 1412 static void 1413 nvme_tcp_accel_seq_recv_compute_crc32_done(void *cb_arg) 1414 { 1415 struct nvme_tcp_req *treq = cb_arg; 1416 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1417 struct nvme_tcp_pdu *pdu = treq->pdu; 1418 bool result; 1419 1420 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1421 result = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1422 if (spdk_unlikely(!result)) { 1423 SPDK_ERRLOG("data digest error on tqpair=(%p)\n", tqpair); 1424 treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1425 } 1426 } 1427 1428 static bool 1429 nvme_tcp_accel_recv_compute_crc32(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu) 1430 { 1431 struct nvme_tcp_qpair *tqpair = treq->tqpair; 1432 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1433 struct nvme_request *req = treq->req; 1434 int rc, dummy = 0; 1435 1436 /* Only support this limited case that the request has only one c2h pdu */ 1437 if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED || 1438 tqpair->qpair.poll_group == NULL || pdu->dif_ctx != NULL || 1439 pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0 || 1440 pdu->data_len != req->payload_size)) { 1441 return false; 1442 } 1443 1444 if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) { 1445 nvme_tcp_req_copy_pdu(treq, pdu); 1446 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1447 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence, 1448 &treq->pdu->data_digest_crc32, 1449 treq->pdu->data_iov, treq->pdu->data_iovcnt, 0, 1450 nvme_tcp_accel_seq_recv_compute_crc32_done, treq); 1451 if (spdk_unlikely(rc != 0)) { 1452 /* If accel is out of resources, fall back to non-accelerated crc32 */ 1453 if (rc == -ENOMEM) { 1454 return false; 1455 } 1456 1457 SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc); 1458 treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1459 } 1460 1461 nvme_tcp_c2h_data_payload_handle(tqpair, treq->pdu, &dummy); 1462 return true; 1463 } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) { 1464 nvme_tcp_req_copy_pdu(treq, pdu); 1465 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1466 nvme_tcp_accel_submit_crc32c(tgroup, treq, &treq->pdu->data_digest_crc32, 1467 treq->pdu->data_iov, treq->pdu->data_iovcnt, 0, 1468 nvme_tcp_accel_recv_compute_crc32_done, treq); 1469 return true; 1470 } 1471 1472 return false; 1473 } 1474 1475 static void 1476 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1477 uint32_t *reaped) 1478 { 1479 int rc = 0; 1480 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1481 uint32_t crc32c; 1482 struct nvme_tcp_req *tcp_req = pdu->req; 1483 1484 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1485 SPDK_DEBUGLOG(nvme, "enter\n"); 1486 1487 /* The request can be NULL, e.g. in case of C2HTermReq */ 1488 if (spdk_likely(tcp_req != NULL)) { 1489 tcp_req->expected_datao += pdu->data_len; 1490 } 1491 1492 /* check data digest if need */ 1493 if (pdu->ddgst_enable) { 1494 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1495 assert(tcp_req != NULL); 1496 if (nvme_tcp_accel_recv_compute_crc32(tcp_req, pdu)) { 1497 return; 1498 } 1499 1500 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1501 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1502 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1503 if (rc == 0) { 1504 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1505 tcp_req = pdu->req; 1506 assert(tcp_req != NULL); 1507 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1508 } 1509 } 1510 1511 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1512 } 1513 1514 static void 1515 nvme_tcp_send_icreq_complete(void *cb_arg) 1516 { 1517 struct nvme_tcp_qpair *tqpair = cb_arg; 1518 1519 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1520 1521 tqpair->flags.icreq_send_ack = true; 1522 1523 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1524 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1525 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1526 } 1527 } 1528 1529 static void 1530 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1531 struct nvme_tcp_pdu *pdu) 1532 { 1533 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1534 uint32_t error_offset = 0; 1535 enum spdk_nvme_tcp_term_req_fes fes; 1536 int recv_buf_size; 1537 1538 /* Only PFV 0 is defined currently */ 1539 if (ic_resp->pfv != 0) { 1540 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1541 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1542 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1543 goto end; 1544 } 1545 1546 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1547 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1548 ic_resp->maxh2cdata); 1549 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1550 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1551 goto end; 1552 } 1553 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1554 1555 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1556 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1557 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1558 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1559 goto end; 1560 } 1561 tqpair->cpda = ic_resp->cpda; 1562 1563 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1564 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1565 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1566 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1567 1568 /* Now that we know whether digests are enabled, properly size the receive buffer to 1569 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1570 * parameter. */ 1571 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1572 1573 if (tqpair->flags.host_hdgst_enable) { 1574 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1575 } 1576 1577 if (tqpair->flags.host_ddgst_enable) { 1578 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1579 } 1580 1581 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1582 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1583 tqpair, 1584 recv_buf_size); 1585 /* Not fatal. */ 1586 } 1587 1588 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1589 1590 if (!tqpair->flags.icreq_send_ack) { 1591 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1592 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1593 return; 1594 } 1595 1596 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1597 return; 1598 end: 1599 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1600 } 1601 1602 static void 1603 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1604 uint32_t *reaped) 1605 { 1606 struct nvme_tcp_req *tcp_req; 1607 struct nvme_tcp_poll_group *tgroup; 1608 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1609 uint32_t cid, error_offset = 0; 1610 enum spdk_nvme_tcp_term_req_fes fes; 1611 1612 SPDK_DEBUGLOG(nvme, "enter\n"); 1613 cid = capsule_resp->rccqe.cid; 1614 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1615 1616 if (!tcp_req) { 1617 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1618 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1619 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1620 goto end; 1621 } 1622 1623 assert(tcp_req->req != NULL); 1624 1625 tcp_req->rsp = capsule_resp->rccqe; 1626 tcp_req->ordering.bits.data_recv = 1; 1627 1628 /* Recv the pdu again */ 1629 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1630 1631 if (tcp_req->req->accel_sequence != NULL) { 1632 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1633 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence); 1634 nvme_tcp_accel_finish_sequence(tgroup, tcp_req, tcp_req->req->accel_sequence, 1635 nvme_tcp_recv_payload_seq_cb, tcp_req); 1636 return; 1637 } 1638 1639 if (nvme_tcp_req_complete_safe(tcp_req)) { 1640 (*reaped)++; 1641 } 1642 1643 return; 1644 1645 end: 1646 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1647 } 1648 1649 static void 1650 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1651 struct nvme_tcp_pdu *pdu) 1652 { 1653 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1654 uint32_t error_offset = 0; 1655 enum spdk_nvme_tcp_term_req_fes fes; 1656 1657 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1658 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1659 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1660 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1661 goto end; 1662 } 1663 1664 /* set the data buffer */ 1665 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1666 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1667 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1668 return; 1669 end: 1670 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1671 } 1672 1673 static void 1674 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1675 { 1676 struct nvme_tcp_req *tcp_req; 1677 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1678 uint32_t error_offset = 0; 1679 enum spdk_nvme_tcp_term_req_fes fes; 1680 int flags = c2h_data->common.flags; 1681 1682 SPDK_DEBUGLOG(nvme, "enter\n"); 1683 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1684 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1685 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1686 if (!tcp_req) { 1687 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1688 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1689 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1690 goto end; 1691 1692 } 1693 1694 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1695 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1696 1697 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1698 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1699 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1700 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1701 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1702 goto end; 1703 } 1704 1705 if (c2h_data->datal > tcp_req->req->payload_size) { 1706 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1707 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1708 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1709 goto end; 1710 } 1711 1712 if (tcp_req->expected_datao != c2h_data->datao) { 1713 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1714 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1715 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1716 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1717 goto end; 1718 } 1719 1720 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1721 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1722 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1723 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1724 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1725 goto end; 1726 1727 } 1728 1729 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1730 c2h_data->datao, c2h_data->datal); 1731 pdu->req = tcp_req; 1732 1733 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1734 return; 1735 1736 end: 1737 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1738 } 1739 1740 static void 1741 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1742 { 1743 struct nvme_tcp_req *tcp_req = cb_arg; 1744 1745 assert(tcp_req != NULL); 1746 1747 tcp_req->ordering.bits.send_ack = 1; 1748 if (tcp_req->r2tl_remain) { 1749 nvme_tcp_send_h2c_data(tcp_req); 1750 } else { 1751 assert(tcp_req->active_r2ts > 0); 1752 tcp_req->active_r2ts--; 1753 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1754 1755 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1756 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1757 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1758 assert(tcp_req->active_r2ts > 0); 1759 tcp_req->ttag = tcp_req->ttag_r2t_next; 1760 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1761 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1762 nvme_tcp_send_h2c_data(tcp_req); 1763 return; 1764 } 1765 1766 /* Need also call this function to free the resource */ 1767 nvme_tcp_req_complete_safe(tcp_req); 1768 } 1769 } 1770 1771 static void 1772 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1773 { 1774 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1775 struct nvme_tcp_pdu *rsp_pdu; 1776 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1777 uint32_t plen, pdo, alignment; 1778 1779 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1780 tcp_req->ordering.bits.send_ack = 0; 1781 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1782 rsp_pdu = tcp_req->pdu; 1783 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1784 rsp_pdu->req = tcp_req; 1785 h2c_data = &rsp_pdu->hdr.h2c_data; 1786 1787 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1788 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1789 h2c_data->cccid = tcp_req->cid; 1790 h2c_data->ttag = tcp_req->ttag; 1791 h2c_data->datao = tcp_req->datao; 1792 1793 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1794 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1795 h2c_data->datao, h2c_data->datal); 1796 tcp_req->r2tl_remain -= h2c_data->datal; 1797 1798 if (tqpair->flags.host_hdgst_enable) { 1799 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1800 plen += SPDK_NVME_TCP_DIGEST_LEN; 1801 } 1802 1803 rsp_pdu->padding_len = 0; 1804 pdo = plen; 1805 if (tqpair->cpda) { 1806 alignment = (tqpair->cpda + 1) << 2; 1807 if (alignment > plen) { 1808 rsp_pdu->padding_len = alignment - plen; 1809 pdo = plen = alignment; 1810 } 1811 } 1812 1813 h2c_data->common.pdo = pdo; 1814 plen += h2c_data->datal; 1815 if (tqpair->flags.host_ddgst_enable) { 1816 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1817 plen += SPDK_NVME_TCP_DIGEST_LEN; 1818 } 1819 1820 h2c_data->common.plen = plen; 1821 tcp_req->datao += h2c_data->datal; 1822 if (!tcp_req->r2tl_remain) { 1823 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1824 } 1825 1826 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1827 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1828 1829 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1830 } 1831 1832 static void 1833 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1834 { 1835 struct nvme_tcp_req *tcp_req; 1836 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1837 uint32_t cid, error_offset = 0; 1838 enum spdk_nvme_tcp_term_req_fes fes; 1839 1840 SPDK_DEBUGLOG(nvme, "enter\n"); 1841 cid = r2t->cccid; 1842 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1843 if (!tcp_req) { 1844 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1845 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1846 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1847 goto end; 1848 } 1849 1850 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1851 tqpair); 1852 1853 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1854 assert(tcp_req->active_r2ts == 0); 1855 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1856 } 1857 1858 if (tcp_req->datao != r2t->r2to) { 1859 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1860 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1861 goto end; 1862 1863 } 1864 1865 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1866 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1867 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1868 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1869 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1870 goto end; 1871 } 1872 1873 tcp_req->active_r2ts++; 1874 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1875 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1876 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1877 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1878 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1879 tcp_req->ttag_r2t_next = r2t->ttag; 1880 tcp_req->r2tl_remain_next = r2t->r2tl; 1881 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1882 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1883 return; 1884 } else { 1885 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1886 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1887 tqpair); 1888 goto end; 1889 } 1890 } 1891 1892 tcp_req->ttag = r2t->ttag; 1893 tcp_req->r2tl_remain = r2t->r2tl; 1894 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1895 1896 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1897 nvme_tcp_send_h2c_data(tcp_req); 1898 } else { 1899 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1900 } 1901 1902 return; 1903 1904 end: 1905 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1906 1907 } 1908 1909 static void 1910 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1911 { 1912 struct nvme_tcp_pdu *pdu; 1913 int rc; 1914 uint32_t crc32c, error_offset = 0; 1915 enum spdk_nvme_tcp_term_req_fes fes; 1916 1917 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1918 pdu = tqpair->recv_pdu; 1919 1920 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1921 /* check header digest if needed */ 1922 if (pdu->has_hdgst) { 1923 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1924 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1925 if (rc == 0) { 1926 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1927 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1928 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1929 return; 1930 1931 } 1932 } 1933 1934 switch (pdu->hdr.common.pdu_type) { 1935 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1936 nvme_tcp_icresp_handle(tqpair, pdu); 1937 break; 1938 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1939 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1940 break; 1941 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1942 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1943 break; 1944 1945 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1946 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1947 break; 1948 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1949 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1950 break; 1951 1952 default: 1953 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1954 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1955 error_offset = 1; 1956 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1957 break; 1958 } 1959 1960 } 1961 1962 static int 1963 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions) 1964 { 1965 int rc = 0; 1966 struct nvme_tcp_pdu *pdu; 1967 uint32_t data_len; 1968 enum nvme_tcp_pdu_recv_state prev_state; 1969 1970 *reaped = tqpair->async_complete; 1971 tqpair->async_complete = 0; 1972 1973 /* The loop here is to allow for several back-to-back state changes. */ 1974 do { 1975 if (*reaped >= max_completions) { 1976 break; 1977 } 1978 1979 prev_state = tqpair->recv_state; 1980 pdu = tqpair->recv_pdu; 1981 switch (tqpair->recv_state) { 1982 /* If in a new state */ 1983 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1984 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1985 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1986 break; 1987 /* Wait for the pdu common header */ 1988 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1989 assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 1990 rc = nvme_tcp_read_data(tqpair->sock, 1991 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1992 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1993 if (rc < 0) { 1994 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1995 break; 1996 } 1997 pdu->ch_valid_bytes += rc; 1998 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1999 return NVME_TCP_PDU_IN_PROGRESS; 2000 } 2001 2002 /* The command header of this PDU has now been read from the socket. */ 2003 nvme_tcp_pdu_ch_handle(tqpair); 2004 break; 2005 /* Wait for the pdu specific header */ 2006 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 2007 assert(pdu->psh_valid_bytes < pdu->psh_len); 2008 rc = nvme_tcp_read_data(tqpair->sock, 2009 pdu->psh_len - pdu->psh_valid_bytes, 2010 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 2011 if (rc < 0) { 2012 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2013 break; 2014 } 2015 2016 pdu->psh_valid_bytes += rc; 2017 if (pdu->psh_valid_bytes < pdu->psh_len) { 2018 return NVME_TCP_PDU_IN_PROGRESS; 2019 } 2020 2021 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 2022 nvme_tcp_pdu_psh_handle(tqpair, reaped); 2023 break; 2024 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 2025 /* check whether the data is valid, if not we just return */ 2026 if (!pdu->data_len) { 2027 return NVME_TCP_PDU_IN_PROGRESS; 2028 } 2029 2030 data_len = pdu->data_len; 2031 /* data digest */ 2032 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 2033 tqpair->flags.host_ddgst_enable)) { 2034 data_len += SPDK_NVME_TCP_DIGEST_LEN; 2035 pdu->ddgst_enable = true; 2036 } 2037 2038 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 2039 if (rc < 0) { 2040 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 2041 break; 2042 } 2043 2044 pdu->rw_offset += rc; 2045 if (pdu->rw_offset < data_len) { 2046 return NVME_TCP_PDU_IN_PROGRESS; 2047 } 2048 2049 assert(pdu->rw_offset == data_len); 2050 /* All of this PDU has now been read from the socket. */ 2051 nvme_tcp_pdu_payload_handle(tqpair, reaped); 2052 break; 2053 case NVME_TCP_PDU_RECV_STATE_QUIESCING: 2054 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2055 if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) { 2056 nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair); 2057 } 2058 2059 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 2060 } 2061 break; 2062 case NVME_TCP_PDU_RECV_STATE_ERROR: 2063 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 2064 return NVME_TCP_PDU_FATAL; 2065 default: 2066 assert(0); 2067 break; 2068 } 2069 } while (prev_state != tqpair->recv_state); 2070 2071 return rc > 0 ? 0 : rc; 2072 } 2073 2074 static void 2075 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 2076 { 2077 uint64_t t02; 2078 struct nvme_tcp_req *tcp_req, *tmp; 2079 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2080 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 2081 struct spdk_nvme_ctrlr_process *active_proc; 2082 2083 /* Don't check timeouts during controller initialization. */ 2084 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 2085 return; 2086 } 2087 2088 if (nvme_qpair_is_admin_queue(qpair)) { 2089 active_proc = nvme_ctrlr_get_current_process(ctrlr); 2090 } else { 2091 active_proc = qpair->active_proc; 2092 } 2093 2094 /* Only check timeouts if the current process has a timeout callback. */ 2095 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 2096 return; 2097 } 2098 2099 t02 = spdk_get_ticks(); 2100 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2101 if (ctrlr->is_failed) { 2102 /* The controller state may be changed to failed in one of the nvme_request_check_timeout callbacks. */ 2103 return; 2104 } 2105 assert(tcp_req->req != NULL); 2106 2107 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 2108 /* 2109 * The requests are in order, so as soon as one has not timed out, 2110 * stop iterating. 2111 */ 2112 break; 2113 } 2114 } 2115 } 2116 2117 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 2118 struct spdk_nvme_qpair *qpair); 2119 2120 static int 2121 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 2122 { 2123 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2124 uint32_t reaped; 2125 int rc; 2126 2127 if (qpair->poll_group == NULL) { 2128 rc = spdk_sock_flush(tqpair->sock); 2129 if (rc < 0 && errno != EAGAIN) { 2130 SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair, 2131 errno, spdk_strerror(errno)); 2132 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 2133 nvme_tcp_qpair_check_timeout(qpair); 2134 } 2135 2136 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 2137 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2138 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 2139 } 2140 2141 /* Don't return errors until the qpair gets disconnected */ 2142 return 0; 2143 } 2144 2145 goto fail; 2146 } 2147 } 2148 2149 if (max_completions == 0) { 2150 max_completions = spdk_max(tqpair->num_entries, 1); 2151 } else { 2152 max_completions = spdk_min(max_completions, tqpair->num_entries); 2153 } 2154 2155 reaped = 0; 2156 rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions); 2157 if (rc < 0) { 2158 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 2159 errno, spdk_strerror(errno)); 2160 goto fail; 2161 } 2162 2163 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 2164 nvme_tcp_qpair_check_timeout(qpair); 2165 } 2166 2167 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 2168 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 2169 if (rc != 0 && rc != -EAGAIN) { 2170 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 2171 goto fail; 2172 } else if (rc == 0) { 2173 /* Once the connection is completed, we can submit queued requests */ 2174 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 2175 } 2176 } 2177 2178 return reaped; 2179 fail: 2180 2181 /* 2182 * Since admin queues take the ctrlr_lock before entering this function, 2183 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 2184 * to call the generic function which will take the lock for us. 2185 */ 2186 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 2187 2188 if (nvme_qpair_is_admin_queue(qpair)) { 2189 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 2190 } else { 2191 nvme_ctrlr_disconnect_qpair(qpair); 2192 } 2193 return -ENXIO; 2194 } 2195 2196 static void 2197 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 2198 { 2199 struct spdk_nvme_qpair *qpair = ctx; 2200 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 2201 int32_t num_completions; 2202 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2203 2204 if (tqpair->needs_poll) { 2205 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 2206 tqpair->needs_poll = false; 2207 } 2208 2209 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 2210 2211 if (pgroup->num_completions >= 0 && num_completions >= 0) { 2212 pgroup->num_completions += num_completions; 2213 pgroup->stats.nvme_completions += num_completions; 2214 } else { 2215 pgroup->num_completions = -ENXIO; 2216 } 2217 } 2218 2219 static int 2220 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 2221 { 2222 struct spdk_nvme_tcp_ic_req *ic_req; 2223 struct nvme_tcp_pdu *pdu; 2224 uint32_t timeout_in_sec; 2225 2226 pdu = tqpair->send_pdu; 2227 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 2228 ic_req = &pdu->hdr.ic_req; 2229 2230 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 2231 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 2232 ic_req->pfv = 0; 2233 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 2234 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 2235 2236 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 2237 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 2238 2239 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 2240 2241 timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC; 2242 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz()); 2243 return 0; 2244 } 2245 2246 static int 2247 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2248 { 2249 struct sockaddr_storage dst_addr; 2250 struct sockaddr_storage src_addr; 2251 int rc; 2252 struct nvme_tcp_qpair *tqpair; 2253 int family; 2254 long int port; 2255 char *sock_impl_name; 2256 struct spdk_sock_impl_opts impl_opts = {}; 2257 size_t impl_opts_size = sizeof(impl_opts); 2258 struct spdk_sock_opts opts; 2259 struct nvme_tcp_ctrlr *tcp_ctrlr; 2260 2261 tqpair = nvme_tcp_qpair(qpair); 2262 2263 switch (ctrlr->trid.adrfam) { 2264 case SPDK_NVMF_ADRFAM_IPV4: 2265 family = AF_INET; 2266 break; 2267 case SPDK_NVMF_ADRFAM_IPV6: 2268 family = AF_INET6; 2269 break; 2270 default: 2271 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 2272 rc = -1; 2273 return rc; 2274 } 2275 2276 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 2277 2278 memset(&dst_addr, 0, sizeof(dst_addr)); 2279 2280 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 2281 if (port <= 0 || port >= INT_MAX) { 2282 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 2283 rc = -1; 2284 return rc; 2285 } 2286 2287 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 2288 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 2289 if (rc != 0) { 2290 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 2291 return rc; 2292 } 2293 2294 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 2295 memset(&src_addr, 0, sizeof(src_addr)); 2296 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 2297 if (rc != 0) { 2298 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 2299 return rc; 2300 } 2301 } 2302 2303 tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 2304 sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL; 2305 SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name); 2306 2307 if (sock_impl_name) { 2308 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 2309 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 2310 impl_opts.psk_identity = tcp_ctrlr->psk_identity; 2311 impl_opts.psk_key = tcp_ctrlr->psk; 2312 impl_opts.psk_key_size = tcp_ctrlr->psk_size; 2313 impl_opts.tls_cipher_suites = tcp_ctrlr->tls_cipher_suite; 2314 } 2315 opts.opts_size = sizeof(opts); 2316 spdk_sock_get_default_opts(&opts); 2317 opts.priority = ctrlr->trid.priority; 2318 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 2319 if (ctrlr->opts.transport_ack_timeout) { 2320 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 2321 } 2322 if (sock_impl_name) { 2323 opts.impl_opts = &impl_opts; 2324 opts.impl_opts_size = sizeof(impl_opts); 2325 } 2326 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts); 2327 if (!tqpair->sock) { 2328 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 2329 tqpair, ctrlr->trid.traddr, port); 2330 rc = -1; 2331 return rc; 2332 } 2333 2334 return 0; 2335 } 2336 2337 static int 2338 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2339 { 2340 struct nvme_tcp_qpair *tqpair; 2341 int rc; 2342 2343 tqpair = nvme_tcp_qpair(qpair); 2344 2345 /* Prevent this function from being called recursively, as it could lead to issues with 2346 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 2347 * call. 2348 */ 2349 if (tqpair->flags.in_connect_poll) { 2350 return -EAGAIN; 2351 } 2352 2353 tqpair->flags.in_connect_poll = 1; 2354 2355 switch (tqpair->state) { 2356 case NVME_TCP_QPAIR_STATE_INVALID: 2357 case NVME_TCP_QPAIR_STATE_INITIALIZING: 2358 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 2359 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 2360 rc = -ETIMEDOUT; 2361 break; 2362 } 2363 rc = -EAGAIN; 2364 break; 2365 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 2366 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 2367 if (rc < 0) { 2368 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 2369 break; 2370 } 2371 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 2372 rc = -EAGAIN; 2373 break; 2374 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 2375 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 2376 if (rc == 0) { 2377 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 2378 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 2379 } else if (rc != -EAGAIN) { 2380 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 2381 } 2382 break; 2383 case NVME_TCP_QPAIR_STATE_RUNNING: 2384 rc = 0; 2385 break; 2386 default: 2387 assert(false); 2388 rc = -EINVAL; 2389 break; 2390 } 2391 2392 tqpair->flags.in_connect_poll = 0; 2393 return rc; 2394 } 2395 2396 static int 2397 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2398 { 2399 int rc = 0; 2400 struct nvme_tcp_qpair *tqpair; 2401 struct nvme_tcp_poll_group *tgroup; 2402 2403 tqpair = nvme_tcp_qpair(qpair); 2404 2405 if (!tqpair->sock) { 2406 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2407 if (rc < 0) { 2408 return rc; 2409 } 2410 } 2411 2412 if (qpair->poll_group) { 2413 rc = nvme_poll_group_connect_qpair(qpair); 2414 if (rc) { 2415 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2416 return rc; 2417 } 2418 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2419 tqpair->stats = &tgroup->stats; 2420 tqpair->shared_stats = true; 2421 } else { 2422 /* When resetting a controller, we disconnect adminq and then reconnect. The stats 2423 * is not freed when disconnecting. So when reconnecting, don't allocate memory 2424 * again. 2425 */ 2426 if (tqpair->stats == NULL) { 2427 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2428 if (!tqpair->stats) { 2429 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2430 return -ENOMEM; 2431 } 2432 } 2433 } 2434 2435 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2436 /* Explicitly set the state and recv_state of tqpair */ 2437 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2438 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2439 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2440 } 2441 rc = nvme_tcp_qpair_icreq_send(tqpair); 2442 if (rc != 0) { 2443 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2444 return rc; 2445 } 2446 2447 return rc; 2448 } 2449 2450 static struct spdk_nvme_qpair * 2451 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2452 uint16_t qid, uint32_t qsize, 2453 enum spdk_nvme_qprio qprio, 2454 uint32_t num_requests, bool async) 2455 { 2456 struct nvme_tcp_qpair *tqpair; 2457 struct spdk_nvme_qpair *qpair; 2458 int rc; 2459 2460 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2461 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2462 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2463 return NULL; 2464 } 2465 2466 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2467 if (!tqpair) { 2468 SPDK_ERRLOG("failed to get create tqpair\n"); 2469 return NULL; 2470 } 2471 2472 /* Set num_entries one less than queue size. According to NVMe 2473 * and NVMe-oF specs we can not submit queue size requests, 2474 * one slot shall always remain empty. 2475 */ 2476 tqpair->num_entries = qsize - 1; 2477 qpair = &tqpair->qpair; 2478 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2479 if (rc != 0) { 2480 free(tqpair); 2481 return NULL; 2482 } 2483 2484 rc = nvme_tcp_alloc_reqs(tqpair); 2485 if (rc) { 2486 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2487 return NULL; 2488 } 2489 2490 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2491 * So create the socket first when creating a qpair. */ 2492 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2493 if (rc) { 2494 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2495 return NULL; 2496 } 2497 2498 return qpair; 2499 } 2500 2501 static struct spdk_nvme_qpair * 2502 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2503 const struct spdk_nvme_io_qpair_opts *opts) 2504 { 2505 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2506 opts->io_queue_requests, opts->async_mode); 2507 } 2508 2509 /* We have to use the typedef in the function declaration to appease astyle. */ 2510 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2511 2512 static int 2513 nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr) 2514 { 2515 int rc; 2516 uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {}; 2517 uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {}; 2518 uint8_t tls_cipher_suite; 2519 uint8_t psk_retained_hash; 2520 uint64_t psk_configured_size; 2521 2522 assert(tctrlr != NULL); 2523 2524 rc = nvme_tcp_parse_interchange_psk(tctrlr->ctrlr.opts.psk, psk_configured, sizeof(psk_configured), 2525 &psk_configured_size, &psk_retained_hash); 2526 if (rc < 0) { 2527 SPDK_ERRLOG("Failed to parse PSK interchange!\n"); 2528 goto finish; 2529 } 2530 2531 /* The Base64 string encodes the configured PSK (32 or 48 bytes binary). 2532 * This check also ensures that psk_configured_size is smaller than 2533 * psk_retained buffer size. */ 2534 if (psk_configured_size == SHA256_DIGEST_LENGTH) { 2535 tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256; 2536 tctrlr->tls_cipher_suite = "TLS_AES_128_GCM_SHA256"; 2537 } else if (psk_configured_size == SHA384_DIGEST_LENGTH) { 2538 tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384; 2539 tctrlr->tls_cipher_suite = "TLS_AES_256_GCM_SHA384"; 2540 } else { 2541 SPDK_ERRLOG("Unrecognized cipher suite!\n"); 2542 rc = -ENOTSUP; 2543 goto finish; 2544 } 2545 2546 rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity), 2547 tctrlr->ctrlr.opts.hostnqn, tctrlr->ctrlr.trid.subnqn, 2548 tls_cipher_suite); 2549 if (rc) { 2550 SPDK_ERRLOG("could not generate PSK identity\n"); 2551 goto finish; 2552 } 2553 2554 /* No hash indicates that Configured PSK must be used as Retained PSK. */ 2555 if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) { 2556 assert(psk_configured_size < sizeof(psk_retained)); 2557 memcpy(psk_retained, psk_configured, psk_configured_size); 2558 rc = psk_configured_size; 2559 } else { 2560 /* Derive retained PSK. */ 2561 rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, tctrlr->ctrlr.opts.hostnqn, 2562 psk_retained, sizeof(psk_retained), psk_retained_hash); 2563 if (rc < 0) { 2564 SPDK_ERRLOG("Unable to derive retained PSK!\n"); 2565 goto finish; 2566 } 2567 } 2568 2569 rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk, 2570 sizeof(tctrlr->psk), tls_cipher_suite); 2571 if (rc < 0) { 2572 SPDK_ERRLOG("Could not generate TLS PSK!\n"); 2573 return rc; 2574 } 2575 2576 tctrlr->psk_size = rc; 2577 rc = 0; 2578 2579 finish: 2580 spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured)); 2581 2582 return rc; 2583 } 2584 2585 static spdk_nvme_ctrlr_t * 2586 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2587 const struct spdk_nvme_ctrlr_opts *opts, 2588 void *devhandle) 2589 { 2590 struct nvme_tcp_ctrlr *tctrlr; 2591 int rc; 2592 2593 tctrlr = calloc(1, sizeof(*tctrlr)); 2594 if (tctrlr == NULL) { 2595 SPDK_ERRLOG("could not allocate ctrlr\n"); 2596 return NULL; 2597 } 2598 2599 tctrlr->ctrlr.opts = *opts; 2600 tctrlr->ctrlr.trid = *trid; 2601 2602 if (opts->psk[0] != '\0') { 2603 rc = nvme_tcp_generate_tls_credentials(tctrlr); 2604 spdk_memset_s(&tctrlr->ctrlr.opts.psk, sizeof(tctrlr->ctrlr.opts.psk), 0, 2605 sizeof(tctrlr->ctrlr.opts.psk)); 2606 2607 if (rc != 0) { 2608 free(tctrlr); 2609 return NULL; 2610 } 2611 } 2612 2613 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2614 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2615 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2616 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2617 } 2618 2619 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2620 if (rc != 0) { 2621 free(tctrlr); 2622 return NULL; 2623 } 2624 2625 /* Only advertise support for accel sequences if data digest is enabled, otherwise it 2626 * doesn't provide any benefits to finish the sequences here */ 2627 if (opts->data_digest) { 2628 tctrlr->ctrlr.flags |= SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED; 2629 } 2630 2631 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2632 tctrlr->ctrlr.opts.admin_queue_size, 0, 2633 tctrlr->ctrlr.opts.admin_queue_size, true); 2634 if (!tctrlr->ctrlr.adminq) { 2635 SPDK_ERRLOG("failed to create admin qpair\n"); 2636 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2637 return NULL; 2638 } 2639 2640 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2641 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2642 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2643 return NULL; 2644 } 2645 2646 return &tctrlr->ctrlr; 2647 } 2648 2649 static uint32_t 2650 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2651 { 2652 /* TCP transport doesn't limit maximum IO transfer size. */ 2653 return UINT32_MAX; 2654 } 2655 2656 static uint16_t 2657 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2658 { 2659 return NVME_TCP_MAX_SGL_DESCRIPTORS; 2660 } 2661 2662 static int 2663 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2664 int (*iter_fn)(struct nvme_request *req, void *arg), 2665 void *arg) 2666 { 2667 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2668 struct nvme_tcp_req *tcp_req, *tmp; 2669 int rc; 2670 2671 assert(iter_fn != NULL); 2672 2673 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2674 assert(tcp_req->req != NULL); 2675 2676 rc = iter_fn(tcp_req->req, arg); 2677 if (rc != 0) { 2678 return rc; 2679 } 2680 } 2681 2682 return 0; 2683 } 2684 2685 static void 2686 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2687 { 2688 struct nvme_tcp_req *tcp_req, *tmp; 2689 struct spdk_nvme_cpl cpl = {}; 2690 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2691 2692 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2693 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2694 2695 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2696 assert(tcp_req->req != NULL); 2697 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2698 continue; 2699 } 2700 2701 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2702 } 2703 } 2704 2705 static struct spdk_nvme_transport_poll_group * 2706 nvme_tcp_poll_group_create(void) 2707 { 2708 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2709 2710 if (group == NULL) { 2711 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2712 return NULL; 2713 } 2714 2715 TAILQ_INIT(&group->needs_poll); 2716 2717 group->sock_group = spdk_sock_group_create(group); 2718 if (group->sock_group == NULL) { 2719 free(group); 2720 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2721 return NULL; 2722 } 2723 2724 return &group->group; 2725 } 2726 2727 static struct spdk_nvme_transport_poll_group * 2728 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2729 { 2730 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2731 struct spdk_sock_group *group = NULL; 2732 int rc; 2733 2734 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2735 if (!rc && group != NULL) { 2736 return spdk_sock_group_get_ctx(group); 2737 } 2738 2739 return NULL; 2740 } 2741 2742 static int 2743 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2744 { 2745 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2746 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2747 2748 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2749 return -EPROTO; 2750 } 2751 return 0; 2752 } 2753 2754 static int 2755 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2756 { 2757 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2758 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2759 2760 if (tqpair->needs_poll) { 2761 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2762 tqpair->needs_poll = false; 2763 } 2764 2765 if (tqpair->sock && group->sock_group) { 2766 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2767 return -EPROTO; 2768 } 2769 } 2770 return 0; 2771 } 2772 2773 static int 2774 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2775 struct spdk_nvme_qpair *qpair) 2776 { 2777 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2778 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2779 2780 /* disconnected qpairs won't have a sock to add. */ 2781 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2782 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2783 return -EPROTO; 2784 } 2785 } 2786 2787 return 0; 2788 } 2789 2790 static int 2791 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2792 struct spdk_nvme_qpair *qpair) 2793 { 2794 struct nvme_tcp_qpair *tqpair; 2795 struct nvme_tcp_poll_group *group; 2796 2797 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2798 2799 tqpair = nvme_tcp_qpair(qpair); 2800 group = nvme_tcp_poll_group(tgroup); 2801 2802 assert(tqpair->shared_stats == true); 2803 tqpair->stats = &g_dummy_stats; 2804 2805 if (tqpair->needs_poll) { 2806 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2807 tqpair->needs_poll = false; 2808 } 2809 2810 return 0; 2811 } 2812 2813 static int64_t 2814 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2815 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2816 { 2817 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2818 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2819 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2820 int num_events; 2821 2822 group->completions_per_qpair = completions_per_qpair; 2823 group->num_completions = 0; 2824 group->stats.polls++; 2825 2826 num_events = spdk_sock_group_poll(group->sock_group); 2827 2828 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2829 tqpair = nvme_tcp_qpair(qpair); 2830 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 2831 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 2832 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 2833 } 2834 } 2835 /* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might 2836 * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and 2837 * might still have outstanding requests) */ 2838 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) { 2839 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2840 } 2841 } 2842 2843 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2844 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2845 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2846 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2847 } 2848 2849 if (spdk_unlikely(num_events < 0)) { 2850 return num_events; 2851 } 2852 2853 group->stats.idle_polls += !num_events; 2854 group->stats.socket_completions += num_events; 2855 2856 return group->num_completions; 2857 } 2858 2859 static int 2860 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2861 { 2862 int rc; 2863 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2864 2865 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2866 return -EBUSY; 2867 } 2868 2869 rc = spdk_sock_group_close(&group->sock_group); 2870 if (rc != 0) { 2871 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2872 assert(false); 2873 } 2874 2875 free(tgroup); 2876 2877 return 0; 2878 } 2879 2880 static int 2881 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2882 struct spdk_nvme_transport_poll_group_stat **_stats) 2883 { 2884 struct nvme_tcp_poll_group *group; 2885 struct spdk_nvme_transport_poll_group_stat *stats; 2886 2887 if (tgroup == NULL || _stats == NULL) { 2888 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2889 return -EINVAL; 2890 } 2891 2892 group = nvme_tcp_poll_group(tgroup); 2893 2894 stats = calloc(1, sizeof(*stats)); 2895 if (!stats) { 2896 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2897 return -ENOMEM; 2898 } 2899 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2900 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2901 2902 *_stats = stats; 2903 2904 return 0; 2905 } 2906 2907 static void 2908 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2909 struct spdk_nvme_transport_poll_group_stat *stats) 2910 { 2911 free(stats); 2912 } 2913 2914 const struct spdk_nvme_transport_ops tcp_ops = { 2915 .name = "TCP", 2916 .type = SPDK_NVME_TRANSPORT_TCP, 2917 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2918 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2919 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2920 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2921 2922 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2923 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2924 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2925 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2926 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2927 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2928 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2929 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2930 2931 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2932 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2933 2934 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2935 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2936 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2937 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2938 2939 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2940 .qpair_reset = nvme_tcp_qpair_reset, 2941 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2942 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2943 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2944 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2945 2946 .poll_group_create = nvme_tcp_poll_group_create, 2947 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2948 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2949 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2950 .poll_group_add = nvme_tcp_poll_group_add, 2951 .poll_group_remove = nvme_tcp_poll_group_remove, 2952 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2953 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2954 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2955 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2956 }; 2957 2958 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2959 2960 SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP) 2961 { 2962 struct spdk_trace_tpoint_opts opts[] = { 2963 { 2964 "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT, 2965 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1, 2966 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2967 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2968 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2969 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2970 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2971 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2972 } 2973 }, 2974 { 2975 "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE, 2976 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0, 2977 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2978 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2979 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2980 } 2981 }, 2982 }; 2983 2984 spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p'); 2985 spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q'); 2986 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 2987 } 2988