1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/trace.h" 22 #include "spdk/util.h" 23 #include "spdk/nvmf.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 #include "spdk_internal/trace_defs.h" 27 28 #define NVME_TCP_RW_BUFFER_SIZE 131072 29 30 /* For async connect workloads, allow more time since we are more likely 31 * to be processing lots ICREQs at once. 32 */ 33 #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */ 34 #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */ 35 36 #define NVME_TCP_HPDA_DEFAULT 0 37 #define NVME_TCP_MAX_R2T_DEFAULT 1 38 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 39 40 /* 41 * Maximum value of transport_ack_timeout used by TCP controller 42 */ 43 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 44 45 46 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 47 struct nvme_tcp_ctrlr { 48 struct spdk_nvme_ctrlr ctrlr; 49 char psk_identity[NVMF_PSK_IDENTITY_LEN]; 50 uint8_t psk[SPDK_TLS_PSK_MAX_LEN]; 51 int psk_size; 52 }; 53 54 struct nvme_tcp_poll_group { 55 struct spdk_nvme_transport_poll_group group; 56 struct spdk_sock_group *sock_group; 57 uint32_t completions_per_qpair; 58 int64_t num_completions; 59 60 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 61 struct spdk_nvme_tcp_stat stats; 62 }; 63 64 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 65 struct nvme_tcp_qpair { 66 struct spdk_nvme_qpair qpair; 67 struct spdk_sock *sock; 68 69 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 70 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 71 72 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 73 struct nvme_tcp_pdu *recv_pdu; 74 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 75 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 76 enum nvme_tcp_pdu_recv_state recv_state; 77 struct nvme_tcp_req *tcp_reqs; 78 struct spdk_nvme_tcp_stat *stats; 79 80 uint16_t num_entries; 81 uint16_t async_complete; 82 83 struct { 84 uint16_t host_hdgst_enable: 1; 85 uint16_t host_ddgst_enable: 1; 86 uint16_t icreq_send_ack: 1; 87 uint16_t in_connect_poll: 1; 88 uint16_t reserved: 12; 89 } flags; 90 91 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 92 uint32_t maxh2cdata; 93 94 uint32_t maxr2t; 95 96 /* 0 based value, which is used to guide the padding */ 97 uint8_t cpda; 98 99 enum nvme_tcp_qpair_state state; 100 101 TAILQ_ENTRY(nvme_tcp_qpair) link; 102 bool needs_poll; 103 104 uint64_t icreq_timeout_tsc; 105 106 bool shared_stats; 107 }; 108 109 enum nvme_tcp_req_state { 110 NVME_TCP_REQ_FREE, 111 NVME_TCP_REQ_ACTIVE, 112 NVME_TCP_REQ_ACTIVE_R2T, 113 }; 114 115 struct nvme_tcp_req { 116 struct nvme_request *req; 117 enum nvme_tcp_req_state state; 118 uint16_t cid; 119 uint16_t ttag; 120 uint32_t datao; 121 uint32_t expected_datao; 122 uint32_t r2tl_remain; 123 uint32_t active_r2ts; 124 /* Used to hold a value received from subsequent R2T while we are still 125 * waiting for H2C complete */ 126 uint16_t ttag_r2t_next; 127 bool in_capsule_data; 128 /* It is used to track whether the req can be safely freed */ 129 union { 130 uint8_t raw; 131 struct { 132 /* The last send operation completed - kernel released send buffer */ 133 uint8_t send_ack : 1; 134 /* Data transfer completed - target send resp or last data bit */ 135 uint8_t data_recv : 1; 136 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 137 * from kernel) to send H2C */ 138 uint8_t h2c_send_waiting_ack : 1; 139 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 140 * Rare case, actual when dealing with target that can send several R2T requests. 141 * SPDK TCP target sends 1 R2T for the whole data buffer */ 142 uint8_t r2t_waiting_h2c_complete : 1; 143 uint8_t reserved : 4; 144 } bits; 145 } ordering; 146 struct nvme_tcp_pdu *pdu; 147 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 148 uint32_t iovcnt; 149 /* Used to hold a value received from subsequent R2T while we are still 150 * waiting for H2C ack */ 151 uint32_t r2tl_remain_next; 152 struct nvme_tcp_qpair *tqpair; 153 TAILQ_ENTRY(nvme_tcp_req) link; 154 struct spdk_nvme_cpl rsp; 155 }; 156 157 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 158 159 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 160 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 161 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 162 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 163 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 164 struct spdk_nvme_cpl *rsp, bool print_on_error); 165 166 static inline struct nvme_tcp_qpair * 167 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 168 { 169 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 170 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 171 } 172 173 static inline struct nvme_tcp_poll_group * 174 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 175 { 176 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 177 } 178 179 static inline struct nvme_tcp_ctrlr * 180 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 181 { 182 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 183 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 184 } 185 186 static struct nvme_tcp_req * 187 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 188 { 189 struct nvme_tcp_req *tcp_req; 190 191 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 192 if (!tcp_req) { 193 return NULL; 194 } 195 196 assert(tcp_req->state == NVME_TCP_REQ_FREE); 197 tcp_req->state = NVME_TCP_REQ_ACTIVE; 198 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 199 tcp_req->datao = 0; 200 tcp_req->expected_datao = 0; 201 tcp_req->req = NULL; 202 tcp_req->in_capsule_data = false; 203 tcp_req->r2tl_remain = 0; 204 tcp_req->r2tl_remain_next = 0; 205 tcp_req->active_r2ts = 0; 206 tcp_req->iovcnt = 0; 207 tcp_req->ordering.raw = 0; 208 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 209 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 210 211 return tcp_req; 212 } 213 214 static void 215 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 216 { 217 assert(tcp_req->state != NVME_TCP_REQ_FREE); 218 tcp_req->state = NVME_TCP_REQ_FREE; 219 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 220 } 221 222 static int 223 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 224 { 225 struct addrinfo *res; 226 struct addrinfo hints; 227 int ret; 228 229 memset(&hints, 0, sizeof(hints)); 230 hints.ai_family = family; 231 hints.ai_socktype = SOCK_STREAM; 232 hints.ai_protocol = 0; 233 234 ret = getaddrinfo(addr, service, &hints, &res); 235 if (ret) { 236 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 237 return -(abs(ret)); 238 } 239 240 if (res->ai_addrlen > sizeof(*sa)) { 241 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 242 ret = -EINVAL; 243 } else { 244 memcpy(sa, res->ai_addr, res->ai_addrlen); 245 } 246 247 freeaddrinfo(res); 248 return ret; 249 } 250 251 static void 252 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 253 { 254 free(tqpair->tcp_reqs); 255 tqpair->tcp_reqs = NULL; 256 257 spdk_free(tqpair->send_pdus); 258 tqpair->send_pdus = NULL; 259 } 260 261 static int 262 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 263 { 264 uint16_t i; 265 struct nvme_tcp_req *tcp_req; 266 267 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 268 if (tqpair->tcp_reqs == NULL) { 269 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 270 goto fail; 271 } 272 273 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 274 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 275 0x1000, NULL, 276 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 277 278 if (tqpair->send_pdus == NULL) { 279 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 280 goto fail; 281 } 282 283 TAILQ_INIT(&tqpair->send_queue); 284 TAILQ_INIT(&tqpair->free_reqs); 285 TAILQ_INIT(&tqpair->outstanding_reqs); 286 for (i = 0; i < tqpair->num_entries; i++) { 287 tcp_req = &tqpair->tcp_reqs[i]; 288 tcp_req->cid = i; 289 tcp_req->tqpair = tqpair; 290 tcp_req->pdu = &tqpair->send_pdus[i]; 291 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 292 } 293 294 tqpair->send_pdu = &tqpair->send_pdus[i]; 295 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 296 297 return 0; 298 fail: 299 nvme_tcp_free_reqs(tqpair); 300 return -ENOMEM; 301 } 302 303 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 304 305 static void 306 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 307 { 308 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 309 struct nvme_tcp_pdu *pdu; 310 int rc; 311 struct nvme_tcp_poll_group *group; 312 313 if (tqpair->needs_poll) { 314 group = nvme_tcp_poll_group(qpair->poll_group); 315 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 316 tqpair->needs_poll = false; 317 } 318 319 rc = spdk_sock_close(&tqpair->sock); 320 321 if (tqpair->sock != NULL) { 322 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 323 /* Set it to NULL manually */ 324 tqpair->sock = NULL; 325 } 326 327 /* clear the send_queue */ 328 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 329 pdu = TAILQ_FIRST(&tqpair->send_queue); 330 /* Remove the pdu from the send_queue to prevent the wrong sending out 331 * in the next round connection 332 */ 333 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 334 } 335 336 nvme_tcp_qpair_abort_reqs(qpair, 0); 337 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 338 } 339 340 static int 341 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 342 { 343 struct nvme_tcp_qpair *tqpair; 344 345 assert(qpair != NULL); 346 nvme_tcp_qpair_abort_reqs(qpair, 0); 347 nvme_qpair_deinit(qpair); 348 tqpair = nvme_tcp_qpair(qpair); 349 nvme_tcp_free_reqs(tqpair); 350 if (!tqpair->shared_stats) { 351 free(tqpair->stats); 352 } 353 free(tqpair); 354 355 return 0; 356 } 357 358 static int 359 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 360 { 361 return 0; 362 } 363 364 static int 365 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 366 { 367 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 368 369 if (ctrlr->adminq) { 370 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 371 } 372 373 nvme_ctrlr_destruct_finish(ctrlr); 374 375 free(tctrlr); 376 377 return 0; 378 } 379 380 static void 381 _pdu_write_done(void *cb_arg, int err) 382 { 383 struct nvme_tcp_pdu *pdu = cb_arg; 384 struct nvme_tcp_qpair *tqpair = pdu->qpair; 385 struct nvme_tcp_poll_group *pgroup; 386 387 /* If there are queued requests, we assume they are queued because they are waiting 388 * for resources to be released. Those resources are almost certainly released in 389 * response to a PDU completing here. However, to attempt to make forward progress 390 * the qpair needs to be polled and we can't rely on another network event to make 391 * that happen. Add it to a list of qpairs to poll regardless of network activity 392 * here. 393 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 394 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 395 * forward progress in case that the resources are released after icreq's or CONNECT's 396 * resp is processed. */ 397 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 398 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 399 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 400 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 401 402 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 403 tqpair->needs_poll = true; 404 } 405 406 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 407 408 if (err != 0) { 409 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 410 return; 411 } 412 413 assert(pdu->cb_fn != NULL); 414 pdu->cb_fn(pdu->cb_arg); 415 } 416 417 static void 418 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 419 { 420 uint32_t mapped_length = 0; 421 struct nvme_tcp_qpair *tqpair = pdu->qpair; 422 423 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, 424 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 425 &mapped_length); 426 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 427 if (spdk_unlikely(mapped_length < pdu->data_len)) { 428 SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len, 429 mapped_length); 430 _pdu_write_done(pdu, -EINVAL); 431 return; 432 } 433 pdu->sock_req.cb_fn = _pdu_write_done; 434 pdu->sock_req.cb_arg = pdu; 435 tqpair->stats->submitted_requests++; 436 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 437 } 438 439 static void 440 data_crc32_accel_done(void *cb_arg, int status) 441 { 442 struct nvme_tcp_pdu *pdu = cb_arg; 443 444 if (spdk_unlikely(status)) { 445 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 446 _pdu_write_done(pdu, status); 447 return; 448 } 449 450 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 451 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 452 453 _tcp_write_pdu(pdu); 454 } 455 456 static void 457 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 458 { 459 struct nvme_tcp_qpair *tqpair = pdu->qpair; 460 uint32_t crc32c; 461 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 462 463 /* Data Digest */ 464 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 465 tqpair->flags.host_ddgst_enable) { 466 /* Only support this limited case for the first step */ 467 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 468 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 469 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 470 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 471 &pdu->data_digest_crc32, pdu->data_iov, 472 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 473 return; 474 } 475 476 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 477 crc32c = crc32c ^ SPDK_CRC32C_XOR; 478 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 479 } 480 481 _tcp_write_pdu(pdu); 482 } 483 484 static int 485 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 486 struct nvme_tcp_pdu *pdu, 487 nvme_tcp_qpair_xfer_complete_cb cb_fn, 488 void *cb_arg) 489 { 490 int hlen; 491 uint32_t crc32c; 492 493 hlen = pdu->hdr.common.hlen; 494 pdu->cb_fn = cb_fn; 495 pdu->cb_arg = cb_arg; 496 pdu->qpair = tqpair; 497 498 /* Header Digest */ 499 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 500 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 501 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 502 } 503 504 pdu_data_crc32_compute(pdu); 505 506 return 0; 507 } 508 509 /* 510 * Build SGL describing contiguous payload buffer. 511 */ 512 static int 513 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 514 { 515 struct nvme_request *req = tcp_req->req; 516 517 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 518 tcp_req->iov[0].iov_len = req->payload_size; 519 tcp_req->iovcnt = 1; 520 521 SPDK_DEBUGLOG(nvme, "enter\n"); 522 523 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 524 525 return 0; 526 } 527 528 /* 529 * Build SGL describing scattered payload buffer. 530 */ 531 static int 532 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 533 { 534 int rc; 535 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 536 struct nvme_request *req = tcp_req->req; 537 538 SPDK_DEBUGLOG(nvme, "enter\n"); 539 540 assert(req->payload_size != 0); 541 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 542 assert(req->payload.reset_sgl_fn != NULL); 543 assert(req->payload.next_sge_fn != NULL); 544 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 545 546 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 547 remaining_size = req->payload_size; 548 549 do { 550 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 551 &length); 552 if (rc) { 553 return -1; 554 } 555 556 length = spdk_min(length, remaining_size); 557 tcp_req->iov[iovcnt].iov_len = length; 558 remaining_size -= length; 559 iovcnt++; 560 } while (remaining_size > 0 && iovcnt < max_num_sgl); 561 562 563 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 564 if (remaining_size > 0) { 565 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 566 tcp_req, iovcnt, remaining_size); 567 return -1; 568 } 569 570 tcp_req->iovcnt = iovcnt; 571 572 return 0; 573 } 574 575 static int 576 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 577 struct nvme_tcp_req *tcp_req) 578 { 579 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 580 int rc = 0; 581 enum spdk_nvme_data_transfer xfer; 582 uint32_t max_in_capsule_data_size; 583 584 tcp_req->req = req; 585 req->cmd.cid = tcp_req->cid; 586 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 587 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 588 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 589 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 590 591 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 592 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 593 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 594 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 595 } else { 596 rc = -1; 597 } 598 599 if (rc) { 600 return rc; 601 } 602 603 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 604 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 605 606 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 607 } else { 608 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 609 } 610 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 611 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 612 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 613 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 614 } 615 616 if (req->payload_size <= max_in_capsule_data_size) { 617 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 618 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 619 req->cmd.dptr.sgl1.address = 0; 620 tcp_req->in_capsule_data = true; 621 } 622 } 623 624 return 0; 625 } 626 627 static inline bool 628 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 629 { 630 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 631 return false; 632 } 633 634 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 635 assert(tcp_req->tqpair != NULL); 636 assert(tcp_req->req != NULL); 637 638 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 639 640 if (!tcp_req->tqpair->qpair.in_completion_context) { 641 tcp_req->tqpair->async_complete++; 642 } 643 644 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 645 return true; 646 } 647 648 static void 649 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 650 { 651 struct nvme_tcp_req *tcp_req = cb_arg; 652 653 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 654 tcp_req->tqpair->qpair.id); 655 tcp_req->ordering.bits.send_ack = 1; 656 /* Handle the r2t case */ 657 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 658 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 659 nvme_tcp_send_h2c_data(tcp_req); 660 } else { 661 nvme_tcp_req_complete_safe(tcp_req); 662 } 663 } 664 665 static int 666 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 667 struct nvme_tcp_req *tcp_req) 668 { 669 struct nvme_tcp_pdu *pdu; 670 struct spdk_nvme_tcp_cmd *capsule_cmd; 671 uint32_t plen = 0, alignment; 672 uint8_t pdo; 673 674 SPDK_DEBUGLOG(nvme, "enter\n"); 675 pdu = tcp_req->pdu; 676 677 capsule_cmd = &pdu->hdr.capsule_cmd; 678 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 679 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 680 capsule_cmd->ccsqe = tcp_req->req->cmd; 681 682 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 683 684 if (tqpair->flags.host_hdgst_enable) { 685 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 686 tcp_req); 687 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 688 plen += SPDK_NVME_TCP_DIGEST_LEN; 689 } 690 691 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 692 goto end; 693 } 694 695 pdo = plen; 696 pdu->padding_len = 0; 697 if (tqpair->cpda) { 698 alignment = (tqpair->cpda + 1) << 2; 699 if (alignment > plen) { 700 pdu->padding_len = alignment - plen; 701 pdo = alignment; 702 plen = alignment; 703 } 704 } 705 706 capsule_cmd->common.pdo = pdo; 707 plen += tcp_req->req->payload_size; 708 if (tqpair->flags.host_ddgst_enable) { 709 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 710 plen += SPDK_NVME_TCP_DIGEST_LEN; 711 } 712 713 tcp_req->datao = 0; 714 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 715 0, tcp_req->req->payload_size); 716 end: 717 capsule_cmd->common.plen = plen; 718 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 719 720 } 721 722 static int 723 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 724 struct nvme_request *req) 725 { 726 struct nvme_tcp_qpair *tqpair; 727 struct nvme_tcp_req *tcp_req; 728 729 tqpair = nvme_tcp_qpair(qpair); 730 assert(tqpair != NULL); 731 assert(req != NULL); 732 733 tcp_req = nvme_tcp_req_get(tqpair); 734 if (!tcp_req) { 735 tqpair->stats->queued_requests++; 736 /* Inform the upper layer to try again later. */ 737 return -EAGAIN; 738 } 739 740 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 741 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 742 nvme_tcp_req_put(tqpair, tcp_req); 743 return -1; 744 } 745 746 spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 747 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 748 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12); 749 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 750 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 751 } 752 753 static int 754 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 755 { 756 return 0; 757 } 758 759 static void 760 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 761 struct nvme_tcp_qpair *tqpair, 762 struct spdk_nvme_cpl *rsp, 763 bool print_on_error) 764 { 765 struct spdk_nvme_cpl cpl; 766 spdk_nvme_cmd_cb user_cb; 767 void *user_cb_arg; 768 struct spdk_nvme_qpair *qpair; 769 struct nvme_request *req; 770 bool error, print_error; 771 772 assert(tcp_req->req != NULL); 773 req = tcp_req->req; 774 775 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 776 memcpy(&cpl, rsp, sizeof(cpl)); 777 user_cb = req->cb_fn; 778 user_cb_arg = req->cb_arg; 779 qpair = req->qpair; 780 781 error = spdk_nvme_cpl_is_error(rsp); 782 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 783 784 if (print_error) { 785 spdk_nvme_qpair_print_command(qpair, &req->cmd); 786 } 787 788 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 789 spdk_nvme_qpair_print_completion(qpair, rsp); 790 } 791 792 spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 793 (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw); 794 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 795 nvme_tcp_req_put(tqpair, tcp_req); 796 nvme_free_request(req); 797 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 798 } 799 800 static void 801 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 802 { 803 struct nvme_tcp_req *tcp_req, *tmp; 804 struct spdk_nvme_cpl cpl = {}; 805 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 806 807 cpl.sqid = qpair->id; 808 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 809 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 810 cpl.status.dnr = dnr; 811 812 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 813 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 814 } 815 } 816 817 static inline void 818 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 819 enum nvme_tcp_pdu_recv_state state) 820 { 821 if (tqpair->recv_state == state) { 822 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 823 tqpair, state); 824 return; 825 } 826 827 if (state == NVME_TCP_PDU_RECV_STATE_ERROR) { 828 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs)); 829 } 830 831 tqpair->recv_state = state; 832 } 833 834 static void 835 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 836 { 837 struct nvme_tcp_qpair *tqpair = cb_arg; 838 839 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 840 } 841 842 static void 843 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 844 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 845 { 846 struct nvme_tcp_pdu *rsp_pdu; 847 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 848 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 849 uint8_t copy_len; 850 851 rsp_pdu = tqpair->send_pdu; 852 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 853 h2c_term_req = &rsp_pdu->hdr.term_req; 854 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 855 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 856 857 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 858 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 859 DSET32(&h2c_term_req->fei, error_offset); 860 } 861 862 copy_len = pdu->hdr.common.hlen; 863 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 864 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 865 } 866 867 /* Copy the error info into the buffer */ 868 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 869 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 870 871 /* Contain the header len of the wrong received pdu */ 872 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 873 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 874 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 875 } 876 877 static bool 878 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 879 { 880 switch (tqpair->state) { 881 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 882 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 883 case NVME_TCP_QPAIR_STATE_RUNNING: 884 return true; 885 default: 886 return false; 887 } 888 } 889 890 static void 891 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 892 { 893 struct nvme_tcp_pdu *pdu; 894 uint32_t error_offset = 0; 895 enum spdk_nvme_tcp_term_req_fes fes; 896 uint32_t expected_hlen, hd_len = 0; 897 bool plen_error = false; 898 899 pdu = tqpair->recv_pdu; 900 901 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 902 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 903 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 904 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 905 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 906 goto err; 907 } 908 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 909 if (pdu->hdr.common.plen != expected_hlen) { 910 plen_error = true; 911 } 912 } else { 913 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 914 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 915 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 916 goto err; 917 } 918 919 switch (pdu->hdr.common.pdu_type) { 920 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 921 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 922 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 923 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 924 } 925 926 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 927 plen_error = true; 928 } 929 break; 930 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 931 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 932 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 933 plen_error = true; 934 } 935 break; 936 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 937 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 938 if ((pdu->hdr.common.plen <= expected_hlen) || 939 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 940 plen_error = true; 941 } 942 break; 943 case SPDK_NVME_TCP_PDU_TYPE_R2T: 944 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 945 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 946 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 947 } 948 949 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 950 plen_error = true; 951 } 952 break; 953 954 default: 955 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 956 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 957 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 958 goto err; 959 } 960 } 961 962 if (pdu->hdr.common.hlen != expected_hlen) { 963 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 964 expected_hlen, pdu->hdr.common.hlen); 965 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 966 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 967 goto err; 968 969 } else if (plen_error) { 970 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 971 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 972 goto err; 973 } else { 974 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 975 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 976 return; 977 } 978 err: 979 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 980 } 981 982 static struct nvme_tcp_req * 983 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 984 { 985 assert(tqpair != NULL); 986 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 987 return NULL; 988 } 989 990 return &tqpair->tcp_reqs[cid]; 991 } 992 993 static void 994 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 995 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 996 { 997 struct nvme_tcp_req *tcp_req; 998 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 999 uint8_t flags; 1000 1001 tcp_req = pdu->req; 1002 assert(tcp_req != NULL); 1003 1004 SPDK_DEBUGLOG(nvme, "enter\n"); 1005 c2h_data = &pdu->hdr.c2h_data; 1006 tcp_req->datao += pdu->data_len; 1007 flags = c2h_data->common.flags; 1008 1009 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 1010 if (tcp_req->datao == tcp_req->req->payload_size) { 1011 tcp_req->rsp.status.p = 0; 1012 } else { 1013 tcp_req->rsp.status.p = 1; 1014 } 1015 1016 tcp_req->rsp.cid = tcp_req->cid; 1017 tcp_req->rsp.sqid = tqpair->qpair.id; 1018 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1019 tcp_req->ordering.bits.data_recv = 1; 1020 if (nvme_tcp_req_complete_safe(tcp_req)) { 1021 (*reaped)++; 1022 } 1023 } 1024 } 1025 } 1026 1027 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1028 "Invalid PDU Header Field", 1029 "PDU Sequence Error", 1030 "Header Digest Error", 1031 "Data Transfer Out of Range", 1032 "Data Transfer Limit Exceeded", 1033 "Unsupported parameter", 1034 }; 1035 1036 static void 1037 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1038 { 1039 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1040 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1041 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1042 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1043 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1044 DGET32(c2h_term_req->fei)); 1045 } 1046 /* we may also need to dump some other info here */ 1047 } 1048 1049 static void 1050 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1051 struct nvme_tcp_pdu *pdu) 1052 { 1053 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1054 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1055 } 1056 1057 static void 1058 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1059 { 1060 struct nvme_tcp_pdu *pdu; 1061 1062 assert(tqpair != NULL); 1063 pdu = tqpair->recv_pdu; 1064 1065 switch (pdu->hdr.common.pdu_type) { 1066 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1067 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1068 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1069 break; 1070 1071 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1072 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1073 break; 1074 1075 default: 1076 /* The code should not go to here */ 1077 SPDK_ERRLOG("The code should not go to here\n"); 1078 break; 1079 } 1080 } 1081 1082 static void 1083 tcp_data_recv_crc32_done(void *cb_arg, int status) 1084 { 1085 struct nvme_tcp_req *tcp_req = cb_arg; 1086 struct nvme_tcp_pdu *pdu; 1087 struct nvme_tcp_qpair *tqpair; 1088 int rc; 1089 struct nvme_tcp_poll_group *pgroup; 1090 int dummy_reaped = 0; 1091 1092 pdu = tcp_req->pdu; 1093 assert(pdu != NULL); 1094 1095 tqpair = tcp_req->tqpair; 1096 assert(tqpair != NULL); 1097 1098 if (tqpair->qpair.poll_group && !tqpair->needs_poll) { 1099 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1100 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1101 tqpair->needs_poll = true; 1102 } 1103 1104 if (spdk_unlikely(status)) { 1105 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1106 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1107 goto end; 1108 } 1109 1110 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1111 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1112 if (rc == 0) { 1113 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1114 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1115 } 1116 1117 end: 1118 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1119 } 1120 1121 static void 1122 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1123 uint32_t *reaped) 1124 { 1125 int rc = 0; 1126 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1127 uint32_t crc32c; 1128 struct nvme_tcp_poll_group *tgroup; 1129 struct nvme_tcp_req *tcp_req = pdu->req; 1130 1131 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1132 SPDK_DEBUGLOG(nvme, "enter\n"); 1133 1134 /* The request can be NULL, e.g. in case of C2HTermReq */ 1135 if (spdk_likely(tcp_req != NULL)) { 1136 tcp_req->expected_datao += pdu->data_len; 1137 } 1138 1139 /* check data digest if need */ 1140 if (pdu->ddgst_enable) { 1141 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1142 assert(tcp_req != NULL); 1143 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1144 /* Only support this limitated case that the request has only one c2h pdu */ 1145 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1146 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1147 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1148 && tcp_req->req->payload_size == pdu->data_len)) { 1149 tcp_req->pdu->hdr = pdu->hdr; 1150 tcp_req->pdu->req = tcp_req; 1151 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1152 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1153 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1154 tcp_req->pdu->data_len = pdu->data_len; 1155 1156 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1157 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1158 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1159 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1160 return; 1161 } 1162 1163 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1164 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1165 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1166 if (rc == 0) { 1167 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1168 tcp_req = pdu->req; 1169 assert(tcp_req != NULL); 1170 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1171 } 1172 } 1173 1174 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1175 } 1176 1177 static void 1178 nvme_tcp_send_icreq_complete(void *cb_arg) 1179 { 1180 struct nvme_tcp_qpair *tqpair = cb_arg; 1181 1182 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1183 1184 tqpair->flags.icreq_send_ack = true; 1185 1186 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1187 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1188 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1189 } 1190 } 1191 1192 static void 1193 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1194 struct nvme_tcp_pdu *pdu) 1195 { 1196 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1197 uint32_t error_offset = 0; 1198 enum spdk_nvme_tcp_term_req_fes fes; 1199 int recv_buf_size; 1200 1201 /* Only PFV 0 is defined currently */ 1202 if (ic_resp->pfv != 0) { 1203 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1204 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1205 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1206 goto end; 1207 } 1208 1209 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1210 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1211 ic_resp->maxh2cdata); 1212 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1213 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1214 goto end; 1215 } 1216 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1217 1218 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1219 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1220 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1221 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1222 goto end; 1223 } 1224 tqpair->cpda = ic_resp->cpda; 1225 1226 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1227 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1228 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1229 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1230 1231 /* Now that we know whether digests are enabled, properly size the receive buffer to 1232 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1233 * parameter. */ 1234 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1235 1236 if (tqpair->flags.host_hdgst_enable) { 1237 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1238 } 1239 1240 if (tqpair->flags.host_ddgst_enable) { 1241 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1242 } 1243 1244 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1245 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1246 tqpair, 1247 recv_buf_size); 1248 /* Not fatal. */ 1249 } 1250 1251 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1252 1253 if (!tqpair->flags.icreq_send_ack) { 1254 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1255 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1256 return; 1257 } 1258 1259 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1260 return; 1261 end: 1262 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1263 } 1264 1265 static void 1266 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1267 uint32_t *reaped) 1268 { 1269 struct nvme_tcp_req *tcp_req; 1270 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1271 uint32_t cid, error_offset = 0; 1272 enum spdk_nvme_tcp_term_req_fes fes; 1273 1274 SPDK_DEBUGLOG(nvme, "enter\n"); 1275 cid = capsule_resp->rccqe.cid; 1276 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1277 1278 if (!tcp_req) { 1279 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1280 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1281 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1282 goto end; 1283 } 1284 1285 assert(tcp_req->req != NULL); 1286 1287 tcp_req->rsp = capsule_resp->rccqe; 1288 tcp_req->ordering.bits.data_recv = 1; 1289 1290 /* Recv the pdu again */ 1291 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1292 1293 if (nvme_tcp_req_complete_safe(tcp_req)) { 1294 (*reaped)++; 1295 } 1296 1297 return; 1298 1299 end: 1300 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1301 } 1302 1303 static void 1304 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1305 struct nvme_tcp_pdu *pdu) 1306 { 1307 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1308 uint32_t error_offset = 0; 1309 enum spdk_nvme_tcp_term_req_fes fes; 1310 1311 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1312 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1313 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1314 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1315 goto end; 1316 } 1317 1318 /* set the data buffer */ 1319 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1320 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1321 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1322 return; 1323 end: 1324 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1325 } 1326 1327 static void 1328 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1329 { 1330 struct nvme_tcp_req *tcp_req; 1331 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1332 uint32_t error_offset = 0; 1333 enum spdk_nvme_tcp_term_req_fes fes; 1334 int flags = c2h_data->common.flags; 1335 1336 SPDK_DEBUGLOG(nvme, "enter\n"); 1337 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1338 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1339 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1340 if (!tcp_req) { 1341 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1342 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1343 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1344 goto end; 1345 1346 } 1347 1348 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1349 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1350 1351 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1352 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1353 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1354 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1355 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1356 goto end; 1357 } 1358 1359 if (c2h_data->datal > tcp_req->req->payload_size) { 1360 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1361 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1362 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1363 goto end; 1364 } 1365 1366 if (tcp_req->expected_datao != c2h_data->datao) { 1367 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1368 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1369 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1370 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1371 goto end; 1372 } 1373 1374 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1375 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1376 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1377 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1378 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1379 goto end; 1380 1381 } 1382 1383 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1384 c2h_data->datao, c2h_data->datal); 1385 pdu->req = tcp_req; 1386 1387 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1388 return; 1389 1390 end: 1391 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1392 } 1393 1394 static void 1395 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1396 { 1397 struct nvme_tcp_req *tcp_req = cb_arg; 1398 1399 assert(tcp_req != NULL); 1400 1401 tcp_req->ordering.bits.send_ack = 1; 1402 if (tcp_req->r2tl_remain) { 1403 nvme_tcp_send_h2c_data(tcp_req); 1404 } else { 1405 assert(tcp_req->active_r2ts > 0); 1406 tcp_req->active_r2ts--; 1407 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1408 1409 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1410 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1411 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1412 assert(tcp_req->active_r2ts > 0); 1413 tcp_req->ttag = tcp_req->ttag_r2t_next; 1414 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1415 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1416 nvme_tcp_send_h2c_data(tcp_req); 1417 return; 1418 } 1419 1420 /* Need also call this function to free the resource */ 1421 nvme_tcp_req_complete_safe(tcp_req); 1422 } 1423 } 1424 1425 static void 1426 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1427 { 1428 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1429 struct nvme_tcp_pdu *rsp_pdu; 1430 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1431 uint32_t plen, pdo, alignment; 1432 1433 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1434 tcp_req->ordering.bits.send_ack = 0; 1435 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1436 rsp_pdu = tcp_req->pdu; 1437 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1438 h2c_data = &rsp_pdu->hdr.h2c_data; 1439 1440 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1441 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1442 h2c_data->cccid = tcp_req->cid; 1443 h2c_data->ttag = tcp_req->ttag; 1444 h2c_data->datao = tcp_req->datao; 1445 1446 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1447 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1448 h2c_data->datao, h2c_data->datal); 1449 tcp_req->r2tl_remain -= h2c_data->datal; 1450 1451 if (tqpair->flags.host_hdgst_enable) { 1452 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1453 plen += SPDK_NVME_TCP_DIGEST_LEN; 1454 } 1455 1456 rsp_pdu->padding_len = 0; 1457 pdo = plen; 1458 if (tqpair->cpda) { 1459 alignment = (tqpair->cpda + 1) << 2; 1460 if (alignment > plen) { 1461 rsp_pdu->padding_len = alignment - plen; 1462 pdo = plen = alignment; 1463 } 1464 } 1465 1466 h2c_data->common.pdo = pdo; 1467 plen += h2c_data->datal; 1468 if (tqpair->flags.host_ddgst_enable) { 1469 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1470 plen += SPDK_NVME_TCP_DIGEST_LEN; 1471 } 1472 1473 h2c_data->common.plen = plen; 1474 tcp_req->datao += h2c_data->datal; 1475 if (!tcp_req->r2tl_remain) { 1476 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1477 } 1478 1479 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1480 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1481 1482 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1483 } 1484 1485 static void 1486 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1487 { 1488 struct nvme_tcp_req *tcp_req; 1489 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1490 uint32_t cid, error_offset = 0; 1491 enum spdk_nvme_tcp_term_req_fes fes; 1492 1493 SPDK_DEBUGLOG(nvme, "enter\n"); 1494 cid = r2t->cccid; 1495 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1496 if (!tcp_req) { 1497 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1498 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1499 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1500 goto end; 1501 } 1502 1503 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1504 tqpair); 1505 1506 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1507 assert(tcp_req->active_r2ts == 0); 1508 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1509 } 1510 1511 if (tcp_req->datao != r2t->r2to) { 1512 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1513 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1514 goto end; 1515 1516 } 1517 1518 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1519 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1520 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1521 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1522 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1523 goto end; 1524 } 1525 1526 tcp_req->active_r2ts++; 1527 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1528 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1529 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1530 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1531 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1532 tcp_req->ttag_r2t_next = r2t->ttag; 1533 tcp_req->r2tl_remain_next = r2t->r2tl; 1534 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1535 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1536 return; 1537 } else { 1538 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1539 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1540 tqpair); 1541 goto end; 1542 } 1543 } 1544 1545 tcp_req->ttag = r2t->ttag; 1546 tcp_req->r2tl_remain = r2t->r2tl; 1547 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1548 1549 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1550 nvme_tcp_send_h2c_data(tcp_req); 1551 } else { 1552 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1553 } 1554 1555 return; 1556 1557 end: 1558 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1559 1560 } 1561 1562 static void 1563 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1564 { 1565 struct nvme_tcp_pdu *pdu; 1566 int rc; 1567 uint32_t crc32c, error_offset = 0; 1568 enum spdk_nvme_tcp_term_req_fes fes; 1569 1570 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1571 pdu = tqpair->recv_pdu; 1572 1573 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1574 /* check header digest if needed */ 1575 if (pdu->has_hdgst) { 1576 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1577 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1578 if (rc == 0) { 1579 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1580 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1581 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1582 return; 1583 1584 } 1585 } 1586 1587 switch (pdu->hdr.common.pdu_type) { 1588 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1589 nvme_tcp_icresp_handle(tqpair, pdu); 1590 break; 1591 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1592 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1593 break; 1594 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1595 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1596 break; 1597 1598 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1599 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1600 break; 1601 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1602 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1603 break; 1604 1605 default: 1606 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1607 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1608 error_offset = 1; 1609 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1610 break; 1611 } 1612 1613 } 1614 1615 static int 1616 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions) 1617 { 1618 int rc = 0; 1619 struct nvme_tcp_pdu *pdu; 1620 uint32_t data_len; 1621 enum nvme_tcp_pdu_recv_state prev_state; 1622 1623 *reaped = tqpair->async_complete; 1624 tqpair->async_complete = 0; 1625 1626 /* The loop here is to allow for several back-to-back state changes. */ 1627 do { 1628 if (*reaped >= max_completions) { 1629 break; 1630 } 1631 1632 prev_state = tqpair->recv_state; 1633 pdu = tqpair->recv_pdu; 1634 switch (tqpair->recv_state) { 1635 /* If in a new state */ 1636 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1637 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1638 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1639 break; 1640 /* Wait for the pdu common header */ 1641 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1642 assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 1643 rc = nvme_tcp_read_data(tqpair->sock, 1644 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1645 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1646 if (rc < 0) { 1647 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1648 break; 1649 } 1650 pdu->ch_valid_bytes += rc; 1651 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1652 return NVME_TCP_PDU_IN_PROGRESS; 1653 } 1654 1655 /* The command header of this PDU has now been read from the socket. */ 1656 nvme_tcp_pdu_ch_handle(tqpair); 1657 break; 1658 /* Wait for the pdu specific header */ 1659 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1660 assert(pdu->psh_valid_bytes < pdu->psh_len); 1661 rc = nvme_tcp_read_data(tqpair->sock, 1662 pdu->psh_len - pdu->psh_valid_bytes, 1663 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1664 if (rc < 0) { 1665 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1666 break; 1667 } 1668 1669 pdu->psh_valid_bytes += rc; 1670 if (pdu->psh_valid_bytes < pdu->psh_len) { 1671 return NVME_TCP_PDU_IN_PROGRESS; 1672 } 1673 1674 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1675 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1676 break; 1677 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1678 /* check whether the data is valid, if not we just return */ 1679 if (!pdu->data_len) { 1680 return NVME_TCP_PDU_IN_PROGRESS; 1681 } 1682 1683 data_len = pdu->data_len; 1684 /* data digest */ 1685 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1686 tqpair->flags.host_ddgst_enable)) { 1687 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1688 pdu->ddgst_enable = true; 1689 } 1690 1691 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1692 if (rc < 0) { 1693 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING); 1694 break; 1695 } 1696 1697 pdu->rw_offset += rc; 1698 if (pdu->rw_offset < data_len) { 1699 return NVME_TCP_PDU_IN_PROGRESS; 1700 } 1701 1702 assert(pdu->rw_offset == data_len); 1703 /* All of this PDU has now been read from the socket. */ 1704 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1705 break; 1706 case NVME_TCP_PDU_RECV_STATE_QUIESCING: 1707 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) { 1708 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1709 } 1710 break; 1711 case NVME_TCP_PDU_RECV_STATE_ERROR: 1712 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1713 return NVME_TCP_PDU_FATAL; 1714 default: 1715 assert(0); 1716 break; 1717 } 1718 } while (prev_state != tqpair->recv_state); 1719 1720 return rc > 0 ? 0 : rc; 1721 } 1722 1723 static void 1724 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1725 { 1726 uint64_t t02; 1727 struct nvme_tcp_req *tcp_req, *tmp; 1728 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1729 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1730 struct spdk_nvme_ctrlr_process *active_proc; 1731 1732 /* Don't check timeouts during controller initialization. */ 1733 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1734 return; 1735 } 1736 1737 if (nvme_qpair_is_admin_queue(qpair)) { 1738 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1739 } else { 1740 active_proc = qpair->active_proc; 1741 } 1742 1743 /* Only check timeouts if the current process has a timeout callback. */ 1744 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1745 return; 1746 } 1747 1748 t02 = spdk_get_ticks(); 1749 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1750 assert(tcp_req->req != NULL); 1751 1752 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1753 /* 1754 * The requests are in order, so as soon as one has not timed out, 1755 * stop iterating. 1756 */ 1757 break; 1758 } 1759 } 1760 } 1761 1762 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 1763 struct spdk_nvme_qpair *qpair); 1764 1765 static int 1766 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1767 { 1768 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1769 uint32_t reaped; 1770 int rc; 1771 1772 if (qpair->poll_group == NULL) { 1773 rc = spdk_sock_flush(tqpair->sock); 1774 if (rc < 0 && errno != EAGAIN) { 1775 SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair, 1776 errno, spdk_strerror(errno)); 1777 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1778 nvme_tcp_qpair_check_timeout(qpair); 1779 } 1780 goto fail; 1781 } 1782 } 1783 1784 if (max_completions == 0) { 1785 max_completions = spdk_max(tqpair->num_entries, 1); 1786 } else { 1787 max_completions = spdk_min(max_completions, tqpair->num_entries); 1788 } 1789 1790 reaped = 0; 1791 rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions); 1792 if (rc < 0) { 1793 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1794 errno, spdk_strerror(errno)); 1795 goto fail; 1796 } 1797 1798 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1799 nvme_tcp_qpair_check_timeout(qpair); 1800 } 1801 1802 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 1803 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 1804 if (rc != 0 && rc != -EAGAIN) { 1805 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 1806 goto fail; 1807 } else if (rc == 0) { 1808 /* Once the connection is completed, we can submit queued requests */ 1809 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 1810 } 1811 } 1812 1813 return reaped; 1814 fail: 1815 1816 /* 1817 * Since admin queues take the ctrlr_lock before entering this function, 1818 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1819 * to call the generic function which will take the lock for us. 1820 */ 1821 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1822 1823 if (nvme_qpair_is_admin_queue(qpair)) { 1824 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1825 } else { 1826 nvme_ctrlr_disconnect_qpair(qpair); 1827 } 1828 return -ENXIO; 1829 } 1830 1831 static void 1832 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1833 { 1834 struct spdk_nvme_qpair *qpair = ctx; 1835 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1836 int32_t num_completions; 1837 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1838 1839 if (tqpair->needs_poll) { 1840 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1841 tqpair->needs_poll = false; 1842 } 1843 1844 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1845 1846 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1847 pgroup->num_completions += num_completions; 1848 pgroup->stats.nvme_completions += num_completions; 1849 } else { 1850 pgroup->num_completions = -ENXIO; 1851 } 1852 } 1853 1854 static int 1855 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1856 { 1857 struct spdk_nvme_tcp_ic_req *ic_req; 1858 struct nvme_tcp_pdu *pdu; 1859 uint32_t timeout_in_sec; 1860 1861 pdu = tqpair->send_pdu; 1862 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1863 ic_req = &pdu->hdr.ic_req; 1864 1865 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1866 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1867 ic_req->pfv = 0; 1868 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1869 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1870 1871 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1872 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1873 1874 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1875 1876 timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC; 1877 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz()); 1878 return 0; 1879 } 1880 1881 static int 1882 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1883 { 1884 struct sockaddr_storage dst_addr; 1885 struct sockaddr_storage src_addr; 1886 int rc; 1887 struct nvme_tcp_qpair *tqpair; 1888 int family; 1889 long int port; 1890 char *sock_impl_name; 1891 struct spdk_sock_impl_opts impl_opts = {}; 1892 size_t impl_opts_size = sizeof(impl_opts); 1893 struct spdk_sock_opts opts; 1894 struct nvme_tcp_ctrlr *tcp_ctrlr; 1895 1896 tqpair = nvme_tcp_qpair(qpair); 1897 1898 switch (ctrlr->trid.adrfam) { 1899 case SPDK_NVMF_ADRFAM_IPV4: 1900 family = AF_INET; 1901 break; 1902 case SPDK_NVMF_ADRFAM_IPV6: 1903 family = AF_INET6; 1904 break; 1905 default: 1906 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1907 rc = -1; 1908 return rc; 1909 } 1910 1911 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1912 1913 memset(&dst_addr, 0, sizeof(dst_addr)); 1914 1915 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1916 if (port <= 0 || port >= INT_MAX) { 1917 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1918 rc = -1; 1919 return rc; 1920 } 1921 1922 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1923 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1924 if (rc != 0) { 1925 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1926 return rc; 1927 } 1928 1929 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1930 memset(&src_addr, 0, sizeof(src_addr)); 1931 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1932 if (rc != 0) { 1933 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1934 return rc; 1935 } 1936 } 1937 1938 tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 1939 sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL; 1940 SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name); 1941 1942 if (sock_impl_name) { 1943 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 1944 impl_opts.enable_ktls = false; 1945 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 1946 impl_opts.psk_identity = tcp_ctrlr->psk_identity; 1947 impl_opts.psk_key = tcp_ctrlr->psk; 1948 impl_opts.psk_key_size = tcp_ctrlr->psk_size; 1949 } 1950 opts.opts_size = sizeof(opts); 1951 spdk_sock_get_default_opts(&opts); 1952 opts.priority = ctrlr->trid.priority; 1953 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1954 if (ctrlr->opts.transport_ack_timeout) { 1955 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 1956 } 1957 if (sock_impl_name) { 1958 opts.impl_opts = &impl_opts; 1959 opts.impl_opts_size = sizeof(impl_opts); 1960 } 1961 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts); 1962 if (!tqpair->sock) { 1963 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1964 tqpair, ctrlr->trid.traddr, port); 1965 rc = -1; 1966 return rc; 1967 } 1968 1969 return 0; 1970 } 1971 1972 static int 1973 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1974 { 1975 struct nvme_tcp_qpair *tqpair; 1976 int rc; 1977 1978 tqpair = nvme_tcp_qpair(qpair); 1979 1980 /* Prevent this function from being called recursively, as it could lead to issues with 1981 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 1982 * call. 1983 */ 1984 if (tqpair->flags.in_connect_poll) { 1985 return -EAGAIN; 1986 } 1987 1988 tqpair->flags.in_connect_poll = 1; 1989 1990 switch (tqpair->state) { 1991 case NVME_TCP_QPAIR_STATE_INVALID: 1992 case NVME_TCP_QPAIR_STATE_INITIALIZING: 1993 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 1994 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1995 rc = -ETIMEDOUT; 1996 break; 1997 } 1998 rc = -EAGAIN; 1999 break; 2000 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 2001 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 2002 if (rc < 0) { 2003 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 2004 break; 2005 } 2006 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 2007 rc = -EAGAIN; 2008 break; 2009 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 2010 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 2011 if (rc == 0) { 2012 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 2013 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 2014 } else if (rc != -EAGAIN) { 2015 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 2016 } 2017 break; 2018 case NVME_TCP_QPAIR_STATE_RUNNING: 2019 rc = 0; 2020 break; 2021 default: 2022 assert(false); 2023 rc = -EINVAL; 2024 break; 2025 } 2026 2027 tqpair->flags.in_connect_poll = 0; 2028 return rc; 2029 } 2030 2031 static int 2032 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2033 { 2034 int rc = 0; 2035 struct nvme_tcp_qpair *tqpair; 2036 struct nvme_tcp_poll_group *tgroup; 2037 2038 tqpair = nvme_tcp_qpair(qpair); 2039 2040 if (!tqpair->sock) { 2041 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2042 if (rc < 0) { 2043 return rc; 2044 } 2045 } 2046 2047 if (qpair->poll_group) { 2048 rc = nvme_poll_group_connect_qpair(qpair); 2049 if (rc) { 2050 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2051 return rc; 2052 } 2053 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2054 tqpair->stats = &tgroup->stats; 2055 tqpair->shared_stats = true; 2056 } else { 2057 /* When resetting a controller, we disconnect adminq and then reconnect. The stats 2058 * is not freed when disconnecting. So when reconnecting, don't allocate memory 2059 * again. 2060 */ 2061 if (tqpair->stats == NULL) { 2062 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2063 if (!tqpair->stats) { 2064 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2065 return -ENOMEM; 2066 } 2067 } 2068 } 2069 2070 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2071 /* Explicitly set the state and recv_state of tqpair */ 2072 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2073 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2074 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2075 } 2076 rc = nvme_tcp_qpair_icreq_send(tqpair); 2077 if (rc != 0) { 2078 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2079 return rc; 2080 } 2081 2082 return rc; 2083 } 2084 2085 static struct spdk_nvme_qpair * 2086 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2087 uint16_t qid, uint32_t qsize, 2088 enum spdk_nvme_qprio qprio, 2089 uint32_t num_requests, bool async) 2090 { 2091 struct nvme_tcp_qpair *tqpair; 2092 struct spdk_nvme_qpair *qpair; 2093 int rc; 2094 2095 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2096 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2097 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2098 return NULL; 2099 } 2100 2101 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2102 if (!tqpair) { 2103 SPDK_ERRLOG("failed to get create tqpair\n"); 2104 return NULL; 2105 } 2106 2107 /* Set num_entries one less than queue size. According to NVMe 2108 * and NVMe-oF specs we can not submit queue size requests, 2109 * one slot shall always remain empty. 2110 */ 2111 tqpair->num_entries = qsize - 1; 2112 qpair = &tqpair->qpair; 2113 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2114 if (rc != 0) { 2115 free(tqpair); 2116 return NULL; 2117 } 2118 2119 rc = nvme_tcp_alloc_reqs(tqpair); 2120 if (rc) { 2121 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2122 return NULL; 2123 } 2124 2125 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2126 * So create the socket first when creating a qpair. */ 2127 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2128 if (rc) { 2129 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2130 return NULL; 2131 } 2132 2133 return qpair; 2134 } 2135 2136 static struct spdk_nvme_qpair * 2137 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2138 const struct spdk_nvme_io_qpair_opts *opts) 2139 { 2140 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2141 opts->io_queue_requests, opts->async_mode); 2142 } 2143 2144 /* We have to use the typedef in the function declaration to appease astyle. */ 2145 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2146 2147 static int 2148 nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr) 2149 { 2150 int rc; 2151 uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {}; 2152 2153 assert(tctrlr != NULL); 2154 2155 rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity), 2156 tctrlr->ctrlr.opts.hostnqn, tctrlr->ctrlr.trid.subnqn); 2157 if (rc) { 2158 SPDK_ERRLOG("could not generate PSK identity\n"); 2159 return -EINVAL; 2160 } 2161 2162 rc = nvme_tcp_derive_retained_psk(tctrlr->ctrlr.opts.psk, tctrlr->ctrlr.opts.hostnqn, psk_retained, 2163 sizeof(psk_retained)); 2164 if (rc < 0) { 2165 SPDK_ERRLOG("Unable to derive retained PSK!\n"); 2166 return -EINVAL; 2167 } 2168 2169 rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk, 2170 sizeof(tctrlr->psk)); 2171 if (rc < 0) { 2172 SPDK_ERRLOG("Could not generate TLS PSK!\n"); 2173 return rc; 2174 } 2175 2176 tctrlr->psk_size = rc; 2177 2178 return 0; 2179 } 2180 2181 static spdk_nvme_ctrlr_t * 2182 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2183 const struct spdk_nvme_ctrlr_opts *opts, 2184 void *devhandle) 2185 { 2186 struct nvme_tcp_ctrlr *tctrlr; 2187 int rc; 2188 2189 tctrlr = calloc(1, sizeof(*tctrlr)); 2190 if (tctrlr == NULL) { 2191 SPDK_ERRLOG("could not allocate ctrlr\n"); 2192 return NULL; 2193 } 2194 2195 tctrlr->ctrlr.opts = *opts; 2196 tctrlr->ctrlr.trid = *trid; 2197 2198 if (opts->psk[0] != '\0') { 2199 rc = nvme_tcp_generate_tls_credentials(tctrlr); 2200 spdk_memset_s(&tctrlr->ctrlr.opts.psk, sizeof(tctrlr->ctrlr.opts.psk), 0, 2201 sizeof(tctrlr->ctrlr.opts.psk)); 2202 2203 if (rc != 0) { 2204 free(tctrlr); 2205 return NULL; 2206 } 2207 } 2208 2209 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2210 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2211 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2212 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2213 } 2214 2215 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2216 if (rc != 0) { 2217 free(tctrlr); 2218 return NULL; 2219 } 2220 2221 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2222 tctrlr->ctrlr.opts.admin_queue_size, 0, 2223 tctrlr->ctrlr.opts.admin_queue_size, true); 2224 if (!tctrlr->ctrlr.adminq) { 2225 SPDK_ERRLOG("failed to create admin qpair\n"); 2226 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2227 return NULL; 2228 } 2229 2230 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2231 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2232 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2233 return NULL; 2234 } 2235 2236 return &tctrlr->ctrlr; 2237 } 2238 2239 static uint32_t 2240 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2241 { 2242 /* TCP transport doesn't limit maximum IO transfer size. */ 2243 return UINT32_MAX; 2244 } 2245 2246 static uint16_t 2247 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2248 { 2249 return NVME_TCP_MAX_SGL_DESCRIPTORS; 2250 } 2251 2252 static int 2253 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2254 int (*iter_fn)(struct nvme_request *req, void *arg), 2255 void *arg) 2256 { 2257 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2258 struct nvme_tcp_req *tcp_req, *tmp; 2259 int rc; 2260 2261 assert(iter_fn != NULL); 2262 2263 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2264 assert(tcp_req->req != NULL); 2265 2266 rc = iter_fn(tcp_req->req, arg); 2267 if (rc != 0) { 2268 return rc; 2269 } 2270 } 2271 2272 return 0; 2273 } 2274 2275 static void 2276 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2277 { 2278 struct nvme_tcp_req *tcp_req, *tmp; 2279 struct spdk_nvme_cpl cpl = {}; 2280 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2281 2282 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2283 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2284 2285 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2286 assert(tcp_req->req != NULL); 2287 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2288 continue; 2289 } 2290 2291 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2292 } 2293 } 2294 2295 static struct spdk_nvme_transport_poll_group * 2296 nvme_tcp_poll_group_create(void) 2297 { 2298 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2299 2300 if (group == NULL) { 2301 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2302 return NULL; 2303 } 2304 2305 TAILQ_INIT(&group->needs_poll); 2306 2307 group->sock_group = spdk_sock_group_create(group); 2308 if (group->sock_group == NULL) { 2309 free(group); 2310 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2311 return NULL; 2312 } 2313 2314 return &group->group; 2315 } 2316 2317 static struct spdk_nvme_transport_poll_group * 2318 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2319 { 2320 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2321 struct spdk_sock_group *group = NULL; 2322 int rc; 2323 2324 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2325 if (!rc && group != NULL) { 2326 return spdk_sock_group_get_ctx(group); 2327 } 2328 2329 return NULL; 2330 } 2331 2332 static int 2333 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2334 { 2335 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2336 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2337 2338 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2339 return -EPROTO; 2340 } 2341 return 0; 2342 } 2343 2344 static int 2345 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2346 { 2347 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2348 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2349 2350 if (tqpair->needs_poll) { 2351 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2352 tqpair->needs_poll = false; 2353 } 2354 2355 if (tqpair->sock && group->sock_group) { 2356 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2357 return -EPROTO; 2358 } 2359 } 2360 return 0; 2361 } 2362 2363 static int 2364 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2365 struct spdk_nvme_qpair *qpair) 2366 { 2367 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2368 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2369 2370 /* disconnected qpairs won't have a sock to add. */ 2371 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2372 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2373 return -EPROTO; 2374 } 2375 } 2376 2377 return 0; 2378 } 2379 2380 static int 2381 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2382 struct spdk_nvme_qpair *qpair) 2383 { 2384 struct nvme_tcp_qpair *tqpair; 2385 struct nvme_tcp_poll_group *group; 2386 2387 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2388 2389 tqpair = nvme_tcp_qpair(qpair); 2390 group = nvme_tcp_poll_group(tgroup); 2391 2392 assert(tqpair->shared_stats == true); 2393 tqpair->stats = &g_dummy_stats; 2394 2395 if (tqpair->needs_poll) { 2396 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2397 tqpair->needs_poll = false; 2398 } 2399 2400 return 0; 2401 } 2402 2403 static int64_t 2404 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2405 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2406 { 2407 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2408 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2409 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2410 int num_events; 2411 2412 group->completions_per_qpair = completions_per_qpair; 2413 group->num_completions = 0; 2414 group->stats.polls++; 2415 2416 num_events = spdk_sock_group_poll(group->sock_group); 2417 2418 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2419 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2420 } 2421 2422 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2423 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2424 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2425 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2426 } 2427 2428 if (spdk_unlikely(num_events < 0)) { 2429 return num_events; 2430 } 2431 2432 group->stats.idle_polls += !num_events; 2433 group->stats.socket_completions += num_events; 2434 2435 return group->num_completions; 2436 } 2437 2438 static int 2439 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2440 { 2441 int rc; 2442 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2443 2444 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2445 return -EBUSY; 2446 } 2447 2448 rc = spdk_sock_group_close(&group->sock_group); 2449 if (rc != 0) { 2450 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2451 assert(false); 2452 } 2453 2454 free(tgroup); 2455 2456 return 0; 2457 } 2458 2459 static int 2460 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2461 struct spdk_nvme_transport_poll_group_stat **_stats) 2462 { 2463 struct nvme_tcp_poll_group *group; 2464 struct spdk_nvme_transport_poll_group_stat *stats; 2465 2466 if (tgroup == NULL || _stats == NULL) { 2467 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2468 return -EINVAL; 2469 } 2470 2471 group = nvme_tcp_poll_group(tgroup); 2472 2473 stats = calloc(1, sizeof(*stats)); 2474 if (!stats) { 2475 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2476 return -ENOMEM; 2477 } 2478 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2479 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2480 2481 *_stats = stats; 2482 2483 return 0; 2484 } 2485 2486 static void 2487 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2488 struct spdk_nvme_transport_poll_group_stat *stats) 2489 { 2490 free(stats); 2491 } 2492 2493 const struct spdk_nvme_transport_ops tcp_ops = { 2494 .name = "TCP", 2495 .type = SPDK_NVME_TRANSPORT_TCP, 2496 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2497 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2498 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2499 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2500 2501 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2502 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2503 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2504 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2505 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2506 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2507 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2508 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2509 2510 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2511 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2512 2513 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2514 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2515 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2516 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2517 2518 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2519 .qpair_reset = nvme_tcp_qpair_reset, 2520 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2521 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2522 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2523 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2524 2525 .poll_group_create = nvme_tcp_poll_group_create, 2526 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2527 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2528 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2529 .poll_group_add = nvme_tcp_poll_group_add, 2530 .poll_group_remove = nvme_tcp_poll_group_remove, 2531 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2532 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2533 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2534 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2535 }; 2536 2537 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2538 2539 SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP) 2540 { 2541 struct spdk_trace_tpoint_opts opts[] = { 2542 { 2543 "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT, 2544 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1, 2545 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2546 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2547 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2548 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2549 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2550 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2551 } 2552 }, 2553 { 2554 "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE, 2555 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0, 2556 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2557 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2558 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2559 } 2560 }, 2561 }; 2562 2563 spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p'); 2564 spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q'); 2565 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 2566 } 2567