1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/thread.h" 22 #include "spdk/trace.h" 23 #include "spdk/util.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 #include "spdk_internal/trace_defs.h" 27 28 #define NVME_TCP_RW_BUFFER_SIZE 131072 29 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 30 31 #define NVME_TCP_HPDA_DEFAULT 0 32 #define NVME_TCP_MAX_R2T_DEFAULT 1 33 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 34 35 /* 36 * Maximum value of transport_ack_timeout used by TCP controller 37 */ 38 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 39 40 41 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 42 struct nvme_tcp_ctrlr { 43 struct spdk_nvme_ctrlr ctrlr; 44 }; 45 46 struct nvme_tcp_poll_group { 47 struct spdk_nvme_transport_poll_group group; 48 struct spdk_sock_group *sock_group; 49 uint32_t completions_per_qpair; 50 int64_t num_completions; 51 52 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 53 struct spdk_nvme_tcp_stat stats; 54 }; 55 56 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 57 struct nvme_tcp_qpair { 58 struct spdk_nvme_qpair qpair; 59 struct spdk_sock *sock; 60 61 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 62 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 63 64 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 65 struct nvme_tcp_pdu *recv_pdu; 66 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 67 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 68 enum nvme_tcp_pdu_recv_state recv_state; 69 struct nvme_tcp_req *tcp_reqs; 70 struct spdk_nvme_tcp_stat *stats; 71 72 uint16_t num_entries; 73 uint16_t async_complete; 74 75 struct { 76 uint16_t host_hdgst_enable: 1; 77 uint16_t host_ddgst_enable: 1; 78 uint16_t icreq_send_ack: 1; 79 uint16_t in_connect_poll: 1; 80 uint16_t reserved: 12; 81 } flags; 82 83 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 84 uint32_t maxh2cdata; 85 86 uint32_t maxr2t; 87 88 /* 0 based value, which is used to guide the padding */ 89 uint8_t cpda; 90 91 enum nvme_tcp_qpair_state state; 92 93 TAILQ_ENTRY(nvme_tcp_qpair) link; 94 bool needs_poll; 95 96 uint64_t icreq_timeout_tsc; 97 98 bool shared_stats; 99 }; 100 101 enum nvme_tcp_req_state { 102 NVME_TCP_REQ_FREE, 103 NVME_TCP_REQ_ACTIVE, 104 NVME_TCP_REQ_ACTIVE_R2T, 105 }; 106 107 struct nvme_tcp_req { 108 struct nvme_request *req; 109 enum nvme_tcp_req_state state; 110 uint16_t cid; 111 uint16_t ttag; 112 uint32_t datao; 113 uint32_t expected_datao; 114 uint32_t r2tl_remain; 115 uint32_t active_r2ts; 116 /* Used to hold a value received from subsequent R2T while we are still 117 * waiting for H2C complete */ 118 uint16_t ttag_r2t_next; 119 bool in_capsule_data; 120 /* It is used to track whether the req can be safely freed */ 121 union { 122 uint8_t raw; 123 struct { 124 /* The last send operation completed - kernel released send buffer */ 125 uint8_t send_ack : 1; 126 /* Data transfer completed - target send resp or last data bit */ 127 uint8_t data_recv : 1; 128 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 129 * from kernel) to send H2C */ 130 uint8_t h2c_send_waiting_ack : 1; 131 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 132 * Rare case, actual when dealing with target that can send several R2T requests. 133 * SPDK TCP target sends 1 R2T for the whole data buffer */ 134 uint8_t r2t_waiting_h2c_complete : 1; 135 uint8_t reserved : 4; 136 } bits; 137 } ordering; 138 struct nvme_tcp_pdu *pdu; 139 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 140 uint32_t iovcnt; 141 /* Used to hold a value received from subsequent R2T while we are still 142 * waiting for H2C ack */ 143 uint32_t r2tl_remain_next; 144 struct nvme_tcp_qpair *tqpair; 145 TAILQ_ENTRY(nvme_tcp_req) link; 146 struct spdk_nvme_cpl rsp; 147 }; 148 149 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 150 151 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 152 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 153 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 154 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 155 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 156 struct spdk_nvme_cpl *rsp, bool print_on_error); 157 158 static inline struct nvme_tcp_qpair * 159 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 160 { 161 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 162 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 163 } 164 165 static inline struct nvme_tcp_poll_group * 166 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 167 { 168 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 169 } 170 171 static inline struct nvme_tcp_ctrlr * 172 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 173 { 174 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 175 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 176 } 177 178 static struct nvme_tcp_req * 179 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 180 { 181 struct nvme_tcp_req *tcp_req; 182 183 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 184 if (!tcp_req) { 185 return NULL; 186 } 187 188 assert(tcp_req->state == NVME_TCP_REQ_FREE); 189 tcp_req->state = NVME_TCP_REQ_ACTIVE; 190 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 191 tcp_req->datao = 0; 192 tcp_req->expected_datao = 0; 193 tcp_req->req = NULL; 194 tcp_req->in_capsule_data = false; 195 tcp_req->r2tl_remain = 0; 196 tcp_req->r2tl_remain_next = 0; 197 tcp_req->active_r2ts = 0; 198 tcp_req->iovcnt = 0; 199 tcp_req->ordering.raw = 0; 200 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 201 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 202 203 return tcp_req; 204 } 205 206 static void 207 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 208 { 209 assert(tcp_req->state != NVME_TCP_REQ_FREE); 210 tcp_req->state = NVME_TCP_REQ_FREE; 211 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 212 } 213 214 static int 215 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 216 { 217 struct addrinfo *res; 218 struct addrinfo hints; 219 int ret; 220 221 memset(&hints, 0, sizeof(hints)); 222 hints.ai_family = family; 223 hints.ai_socktype = SOCK_STREAM; 224 hints.ai_protocol = 0; 225 226 ret = getaddrinfo(addr, service, &hints, &res); 227 if (ret) { 228 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 229 return ret; 230 } 231 232 if (res->ai_addrlen > sizeof(*sa)) { 233 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 234 ret = -EINVAL; 235 } else { 236 memcpy(sa, res->ai_addr, res->ai_addrlen); 237 } 238 239 freeaddrinfo(res); 240 return ret; 241 } 242 243 static void 244 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 245 { 246 free(tqpair->tcp_reqs); 247 tqpair->tcp_reqs = NULL; 248 249 spdk_free(tqpair->send_pdus); 250 tqpair->send_pdus = NULL; 251 } 252 253 static int 254 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 255 { 256 uint16_t i; 257 struct nvme_tcp_req *tcp_req; 258 259 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 260 if (tqpair->tcp_reqs == NULL) { 261 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 262 goto fail; 263 } 264 265 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 266 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 267 0x1000, NULL, 268 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 269 270 if (tqpair->send_pdus == NULL) { 271 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 272 goto fail; 273 } 274 275 TAILQ_INIT(&tqpair->send_queue); 276 TAILQ_INIT(&tqpair->free_reqs); 277 TAILQ_INIT(&tqpair->outstanding_reqs); 278 for (i = 0; i < tqpair->num_entries; i++) { 279 tcp_req = &tqpair->tcp_reqs[i]; 280 tcp_req->cid = i; 281 tcp_req->tqpair = tqpair; 282 tcp_req->pdu = &tqpair->send_pdus[i]; 283 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 284 } 285 286 tqpair->send_pdu = &tqpair->send_pdus[i]; 287 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 288 289 return 0; 290 fail: 291 nvme_tcp_free_reqs(tqpair); 292 return -ENOMEM; 293 } 294 295 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 296 297 static void 298 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 299 { 300 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 301 struct nvme_tcp_pdu *pdu; 302 int rc; 303 struct nvme_tcp_poll_group *group; 304 305 if (tqpair->needs_poll) { 306 group = nvme_tcp_poll_group(qpair->poll_group); 307 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 308 tqpair->needs_poll = false; 309 } 310 311 rc = spdk_sock_close(&tqpair->sock); 312 313 if (tqpair->sock != NULL) { 314 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 315 /* Set it to NULL manually */ 316 tqpair->sock = NULL; 317 } 318 319 /* clear the send_queue */ 320 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 321 pdu = TAILQ_FIRST(&tqpair->send_queue); 322 /* Remove the pdu from the send_queue to prevent the wrong sending out 323 * in the next round connection 324 */ 325 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 326 } 327 328 nvme_tcp_qpair_abort_reqs(qpair, 0); 329 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 330 } 331 332 static int 333 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 334 { 335 struct nvme_tcp_qpair *tqpair; 336 337 assert(qpair != NULL); 338 nvme_tcp_qpair_abort_reqs(qpair, 0); 339 nvme_qpair_deinit(qpair); 340 tqpair = nvme_tcp_qpair(qpair); 341 nvme_tcp_free_reqs(tqpair); 342 if (!tqpair->shared_stats) { 343 free(tqpair->stats); 344 } 345 free(tqpair); 346 347 return 0; 348 } 349 350 static int 351 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 352 { 353 return 0; 354 } 355 356 static int 357 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 358 { 359 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 360 361 if (ctrlr->adminq) { 362 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 363 } 364 365 nvme_ctrlr_destruct_finish(ctrlr); 366 367 free(tctrlr); 368 369 return 0; 370 } 371 372 static void 373 _pdu_write_done(void *cb_arg, int err) 374 { 375 struct nvme_tcp_pdu *pdu = cb_arg; 376 struct nvme_tcp_qpair *tqpair = pdu->qpair; 377 struct nvme_tcp_poll_group *pgroup; 378 379 /* If there are queued requests, we assume they are queued because they are waiting 380 * for resources to be released. Those resources are almost certainly released in 381 * response to a PDU completing here. However, to attempt to make forward progress 382 * the qpair needs to be polled and we can't rely on another network event to make 383 * that happen. Add it to a list of qpairs to poll regardless of network activity 384 * here. 385 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 386 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 387 * forward progress in case that the resources are released after icreq's or CONNECT's 388 * resp is processed. */ 389 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 390 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 391 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 392 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 393 394 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 395 tqpair->needs_poll = true; 396 } 397 398 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 399 400 if (err != 0) { 401 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 402 return; 403 } 404 405 assert(pdu->cb_fn != NULL); 406 pdu->cb_fn(pdu->cb_arg); 407 } 408 409 static void 410 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 411 { 412 uint32_t mapped_length = 0; 413 struct nvme_tcp_qpair *tqpair = pdu->qpair; 414 415 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 416 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 417 &mapped_length); 418 pdu->sock_req.cb_fn = _pdu_write_done; 419 pdu->sock_req.cb_arg = pdu; 420 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 421 tqpair->stats->submitted_requests++; 422 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 423 } 424 425 static void 426 data_crc32_accel_done(void *cb_arg, int status) 427 { 428 struct nvme_tcp_pdu *pdu = cb_arg; 429 430 if (spdk_unlikely(status)) { 431 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 432 _pdu_write_done(pdu, status); 433 return; 434 } 435 436 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 437 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 438 439 _tcp_write_pdu(pdu); 440 } 441 442 static void 443 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 444 { 445 struct nvme_tcp_qpair *tqpair = pdu->qpair; 446 uint32_t crc32c; 447 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 448 449 /* Data Digest */ 450 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 451 tqpair->flags.host_ddgst_enable) { 452 /* Only support this limited case for the first step */ 453 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 454 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 455 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 456 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 457 &pdu->data_digest_crc32, pdu->data_iov, 458 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 459 return; 460 } 461 462 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 463 crc32c = crc32c ^ SPDK_CRC32C_XOR; 464 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 465 } 466 467 _tcp_write_pdu(pdu); 468 } 469 470 static int 471 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 472 struct nvme_tcp_pdu *pdu, 473 nvme_tcp_qpair_xfer_complete_cb cb_fn, 474 void *cb_arg) 475 { 476 int hlen; 477 uint32_t crc32c; 478 479 hlen = pdu->hdr.common.hlen; 480 pdu->cb_fn = cb_fn; 481 pdu->cb_arg = cb_arg; 482 pdu->qpair = tqpair; 483 484 /* Header Digest */ 485 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 486 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 487 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 488 } 489 490 pdu_data_crc32_compute(pdu); 491 492 return 0; 493 } 494 495 /* 496 * Build SGL describing contiguous payload buffer. 497 */ 498 static int 499 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 500 { 501 struct nvme_request *req = tcp_req->req; 502 503 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 504 tcp_req->iov[0].iov_len = req->payload_size; 505 tcp_req->iovcnt = 1; 506 507 SPDK_DEBUGLOG(nvme, "enter\n"); 508 509 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 510 511 return 0; 512 } 513 514 /* 515 * Build SGL describing scattered payload buffer. 516 */ 517 static int 518 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 519 { 520 int rc; 521 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 522 struct nvme_request *req = tcp_req->req; 523 524 SPDK_DEBUGLOG(nvme, "enter\n"); 525 526 assert(req->payload_size != 0); 527 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 528 assert(req->payload.reset_sgl_fn != NULL); 529 assert(req->payload.next_sge_fn != NULL); 530 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 531 532 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 533 remaining_size = req->payload_size; 534 535 do { 536 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 537 &length); 538 if (rc) { 539 return -1; 540 } 541 542 length = spdk_min(length, remaining_size); 543 tcp_req->iov[iovcnt].iov_len = length; 544 remaining_size -= length; 545 iovcnt++; 546 } while (remaining_size > 0 && iovcnt < max_num_sgl); 547 548 549 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 550 if (remaining_size > 0) { 551 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 552 tcp_req, iovcnt, remaining_size); 553 return -1; 554 } 555 556 tcp_req->iovcnt = iovcnt; 557 558 return 0; 559 } 560 561 static int 562 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 563 struct nvme_tcp_req *tcp_req) 564 { 565 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 566 int rc = 0; 567 enum spdk_nvme_data_transfer xfer; 568 uint32_t max_in_capsule_data_size; 569 570 tcp_req->req = req; 571 req->cmd.cid = tcp_req->cid; 572 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 573 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 574 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 575 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 576 577 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 578 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 579 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 580 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 581 } else { 582 rc = -1; 583 } 584 585 if (rc) { 586 return rc; 587 } 588 589 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 590 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 591 592 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 593 } else { 594 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 595 } 596 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 597 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 598 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 599 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 600 } 601 602 if (req->payload_size <= max_in_capsule_data_size) { 603 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 604 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 605 req->cmd.dptr.sgl1.address = 0; 606 tcp_req->in_capsule_data = true; 607 } 608 } 609 610 return 0; 611 } 612 613 static inline bool 614 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 615 { 616 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 617 return false; 618 } 619 620 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 621 assert(tcp_req->tqpair != NULL); 622 assert(tcp_req->req != NULL); 623 624 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 625 626 if (!tcp_req->tqpair->qpair.in_completion_context) { 627 tcp_req->tqpair->async_complete++; 628 } 629 630 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 631 return true; 632 } 633 634 static void 635 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 636 { 637 struct nvme_tcp_req *tcp_req = cb_arg; 638 639 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 640 tcp_req->tqpair->qpair.id); 641 tcp_req->ordering.bits.send_ack = 1; 642 /* Handle the r2t case */ 643 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 644 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 645 nvme_tcp_send_h2c_data(tcp_req); 646 } else { 647 nvme_tcp_req_complete_safe(tcp_req); 648 } 649 } 650 651 static int 652 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 653 struct nvme_tcp_req *tcp_req) 654 { 655 struct nvme_tcp_pdu *pdu; 656 struct spdk_nvme_tcp_cmd *capsule_cmd; 657 uint32_t plen = 0, alignment; 658 uint8_t pdo; 659 660 SPDK_DEBUGLOG(nvme, "enter\n"); 661 pdu = tcp_req->pdu; 662 663 capsule_cmd = &pdu->hdr.capsule_cmd; 664 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 665 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 666 capsule_cmd->ccsqe = tcp_req->req->cmd; 667 668 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 669 670 if (tqpair->flags.host_hdgst_enable) { 671 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 672 tcp_req); 673 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 674 plen += SPDK_NVME_TCP_DIGEST_LEN; 675 } 676 677 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 678 goto end; 679 } 680 681 pdo = plen; 682 pdu->padding_len = 0; 683 if (tqpair->cpda) { 684 alignment = (tqpair->cpda + 1) << 2; 685 if (alignment > plen) { 686 pdu->padding_len = alignment - plen; 687 pdo = alignment; 688 plen = alignment; 689 } 690 } 691 692 capsule_cmd->common.pdo = pdo; 693 plen += tcp_req->req->payload_size; 694 if (tqpair->flags.host_ddgst_enable) { 695 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 696 plen += SPDK_NVME_TCP_DIGEST_LEN; 697 } 698 699 tcp_req->datao = 0; 700 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 701 0, tcp_req->req->payload_size); 702 end: 703 capsule_cmd->common.plen = plen; 704 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 705 706 } 707 708 static int 709 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 710 struct nvme_request *req) 711 { 712 struct nvme_tcp_qpair *tqpair; 713 struct nvme_tcp_req *tcp_req; 714 715 tqpair = nvme_tcp_qpair(qpair); 716 assert(tqpair != NULL); 717 assert(req != NULL); 718 719 tcp_req = nvme_tcp_req_get(tqpair); 720 if (!tcp_req) { 721 tqpair->stats->queued_requests++; 722 /* Inform the upper layer to try again later. */ 723 return -EAGAIN; 724 } 725 726 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 727 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 728 nvme_tcp_req_put(tqpair, tcp_req); 729 return -1; 730 } 731 732 spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 733 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 734 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12); 735 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 736 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 737 } 738 739 static int 740 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 741 { 742 return 0; 743 } 744 745 static void 746 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 747 struct nvme_tcp_qpair *tqpair, 748 struct spdk_nvme_cpl *rsp, 749 bool print_on_error) 750 { 751 struct spdk_nvme_cpl cpl; 752 spdk_nvme_cmd_cb user_cb; 753 void *user_cb_arg; 754 struct spdk_nvme_qpair *qpair; 755 struct nvme_request *req; 756 bool error, print_error; 757 758 assert(tcp_req->req != NULL); 759 req = tcp_req->req; 760 761 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 762 memcpy(&cpl, rsp, sizeof(cpl)); 763 user_cb = req->cb_fn; 764 user_cb_arg = req->cb_arg; 765 qpair = req->qpair; 766 767 error = spdk_nvme_cpl_is_error(rsp); 768 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 769 770 if (print_error) { 771 spdk_nvme_qpair_print_command(qpair, &req->cmd); 772 } 773 774 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 775 spdk_nvme_qpair_print_completion(qpair, rsp); 776 } 777 778 spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 779 (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw); 780 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 781 nvme_tcp_req_put(tqpair, tcp_req); 782 nvme_free_request(req); 783 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 784 } 785 786 static void 787 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 788 { 789 struct nvme_tcp_req *tcp_req, *tmp; 790 struct spdk_nvme_cpl cpl = {}; 791 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 792 793 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 794 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 795 cpl.status.dnr = dnr; 796 797 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 798 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 799 } 800 } 801 802 static inline void 803 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 804 enum nvme_tcp_pdu_recv_state state) 805 { 806 if (tqpair->recv_state == state) { 807 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 808 tqpair, state); 809 return; 810 } 811 tqpair->recv_state = state; 812 } 813 814 static void 815 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 816 { 817 struct nvme_tcp_qpair *tqpair = cb_arg; 818 819 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 820 } 821 822 static void 823 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 824 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 825 { 826 struct nvme_tcp_pdu *rsp_pdu; 827 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 828 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 829 uint8_t copy_len; 830 831 rsp_pdu = tqpair->send_pdu; 832 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 833 h2c_term_req = &rsp_pdu->hdr.term_req; 834 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 835 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 836 837 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 838 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 839 DSET32(&h2c_term_req->fei, error_offset); 840 } 841 842 copy_len = pdu->hdr.common.hlen; 843 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 844 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 845 } 846 847 /* Copy the error info into the buffer */ 848 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 849 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 850 851 /* Contain the header len of the wrong received pdu */ 852 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 853 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 854 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 855 } 856 857 static bool 858 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 859 { 860 switch (tqpair->state) { 861 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 862 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 863 case NVME_TCP_QPAIR_STATE_RUNNING: 864 return true; 865 default: 866 return false; 867 } 868 } 869 870 static void 871 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 872 { 873 struct nvme_tcp_pdu *pdu; 874 uint32_t error_offset = 0; 875 enum spdk_nvme_tcp_term_req_fes fes; 876 uint32_t expected_hlen, hd_len = 0; 877 bool plen_error = false; 878 879 pdu = tqpair->recv_pdu; 880 881 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 882 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 883 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 884 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 885 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 886 goto err; 887 } 888 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 889 if (pdu->hdr.common.plen != expected_hlen) { 890 plen_error = true; 891 } 892 } else { 893 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 894 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 895 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 896 goto err; 897 } 898 899 switch (pdu->hdr.common.pdu_type) { 900 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 901 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 902 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 903 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 904 } 905 906 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 907 plen_error = true; 908 } 909 break; 910 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 911 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 912 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 913 plen_error = true; 914 } 915 break; 916 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 917 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 918 if ((pdu->hdr.common.plen <= expected_hlen) || 919 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 920 plen_error = true; 921 } 922 break; 923 case SPDK_NVME_TCP_PDU_TYPE_R2T: 924 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 925 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 926 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 927 } 928 929 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 930 plen_error = true; 931 } 932 break; 933 934 default: 935 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 936 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 937 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 938 goto err; 939 } 940 } 941 942 if (pdu->hdr.common.hlen != expected_hlen) { 943 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 944 expected_hlen, pdu->hdr.common.hlen); 945 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 946 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 947 goto err; 948 949 } else if (plen_error) { 950 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 951 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 952 goto err; 953 } else { 954 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 955 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 956 return; 957 } 958 err: 959 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 960 } 961 962 static struct nvme_tcp_req * 963 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 964 { 965 assert(tqpair != NULL); 966 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 967 return NULL; 968 } 969 970 return &tqpair->tcp_reqs[cid]; 971 } 972 973 static void 974 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 975 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 976 { 977 struct nvme_tcp_req *tcp_req; 978 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 979 uint8_t flags; 980 981 tcp_req = pdu->req; 982 assert(tcp_req != NULL); 983 984 SPDK_DEBUGLOG(nvme, "enter\n"); 985 c2h_data = &pdu->hdr.c2h_data; 986 tcp_req->datao += pdu->data_len; 987 flags = c2h_data->common.flags; 988 989 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 990 if (tcp_req->datao == tcp_req->req->payload_size) { 991 tcp_req->rsp.status.p = 0; 992 } else { 993 tcp_req->rsp.status.p = 1; 994 } 995 996 tcp_req->rsp.cid = tcp_req->cid; 997 tcp_req->rsp.sqid = tqpair->qpair.id; 998 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 999 tcp_req->ordering.bits.data_recv = 1; 1000 if (nvme_tcp_req_complete_safe(tcp_req)) { 1001 (*reaped)++; 1002 } 1003 } 1004 } 1005 } 1006 1007 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1008 "Invalid PDU Header Field", 1009 "PDU Sequence Error", 1010 "Header Digest Error", 1011 "Data Transfer Out of Range", 1012 "Data Transfer Limit Exceeded", 1013 "Unsupported parameter", 1014 }; 1015 1016 static void 1017 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1018 { 1019 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1020 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1021 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1022 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1023 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1024 DGET32(c2h_term_req->fei)); 1025 } 1026 /* we may also need to dump some other info here */ 1027 } 1028 1029 static void 1030 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1031 struct nvme_tcp_pdu *pdu) 1032 { 1033 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1034 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1035 } 1036 1037 static void 1038 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1039 { 1040 struct nvme_tcp_pdu *pdu; 1041 1042 assert(tqpair != NULL); 1043 pdu = tqpair->recv_pdu; 1044 1045 switch (pdu->hdr.common.pdu_type) { 1046 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1047 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1048 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1049 break; 1050 1051 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1052 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1053 break; 1054 1055 default: 1056 /* The code should not go to here */ 1057 SPDK_ERRLOG("The code should not go to here\n"); 1058 break; 1059 } 1060 } 1061 1062 static void 1063 tcp_data_recv_crc32_done(void *cb_arg, int status) 1064 { 1065 struct nvme_tcp_req *tcp_req = cb_arg; 1066 struct nvme_tcp_pdu *pdu; 1067 struct nvme_tcp_qpair *tqpair; 1068 int rc; 1069 struct nvme_tcp_poll_group *pgroup; 1070 int dummy_reaped = 0; 1071 1072 pdu = tcp_req->pdu; 1073 assert(pdu != NULL); 1074 1075 tqpair = tcp_req->tqpair; 1076 assert(tqpair != NULL); 1077 1078 if (tqpair->qpair.poll_group && !tqpair->needs_poll) { 1079 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1080 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1081 tqpair->needs_poll = true; 1082 } 1083 1084 if (spdk_unlikely(status)) { 1085 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1086 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1087 goto end; 1088 } 1089 1090 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1091 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1092 if (rc == 0) { 1093 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1094 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1095 } 1096 1097 end: 1098 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1099 } 1100 1101 static void 1102 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1103 uint32_t *reaped) 1104 { 1105 int rc = 0; 1106 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1107 uint32_t crc32c; 1108 struct nvme_tcp_poll_group *tgroup; 1109 struct nvme_tcp_req *tcp_req = pdu->req; 1110 1111 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1112 SPDK_DEBUGLOG(nvme, "enter\n"); 1113 1114 /* The request can be NULL, e.g. in case of C2HTermReq */ 1115 if (spdk_likely(tcp_req != NULL)) { 1116 tcp_req->expected_datao += pdu->data_len; 1117 } 1118 1119 /* check data digest if need */ 1120 if (pdu->ddgst_enable) { 1121 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1122 assert(tcp_req != NULL); 1123 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1124 /* Only support this limitated case that the request has only one c2h pdu */ 1125 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1126 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1127 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1128 && tcp_req->req->payload_size == pdu->data_len)) { 1129 tcp_req->pdu->hdr = pdu->hdr; 1130 tcp_req->pdu->req = tcp_req; 1131 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1132 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1133 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1134 tcp_req->pdu->data_len = pdu->data_len; 1135 1136 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1137 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1138 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1139 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1140 return; 1141 } 1142 1143 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1144 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1145 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1146 if (rc == 0) { 1147 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1148 tcp_req = pdu->req; 1149 assert(tcp_req != NULL); 1150 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1151 } 1152 } 1153 1154 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1155 } 1156 1157 static void 1158 nvme_tcp_send_icreq_complete(void *cb_arg) 1159 { 1160 struct nvme_tcp_qpair *tqpair = cb_arg; 1161 1162 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1163 1164 tqpair->flags.icreq_send_ack = true; 1165 1166 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1167 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1168 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1169 } 1170 } 1171 1172 static void 1173 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1174 struct nvme_tcp_pdu *pdu) 1175 { 1176 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1177 uint32_t error_offset = 0; 1178 enum spdk_nvme_tcp_term_req_fes fes; 1179 int recv_buf_size; 1180 1181 /* Only PFV 0 is defined currently */ 1182 if (ic_resp->pfv != 0) { 1183 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1184 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1185 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1186 goto end; 1187 } 1188 1189 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1190 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1191 ic_resp->maxh2cdata); 1192 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1193 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1194 goto end; 1195 } 1196 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1197 1198 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1199 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1200 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1201 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1202 goto end; 1203 } 1204 tqpair->cpda = ic_resp->cpda; 1205 1206 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1207 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1208 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1209 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1210 1211 /* Now that we know whether digests are enabled, properly size the receive buffer to 1212 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1213 * parameter. */ 1214 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1215 1216 if (tqpair->flags.host_hdgst_enable) { 1217 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1218 } 1219 1220 if (tqpair->flags.host_ddgst_enable) { 1221 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1222 } 1223 1224 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1225 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1226 tqpair, 1227 recv_buf_size); 1228 /* Not fatal. */ 1229 } 1230 1231 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1232 1233 if (!tqpair->flags.icreq_send_ack) { 1234 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1235 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1236 return; 1237 } 1238 1239 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1240 return; 1241 end: 1242 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1243 } 1244 1245 static void 1246 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1247 uint32_t *reaped) 1248 { 1249 struct nvme_tcp_req *tcp_req; 1250 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1251 uint32_t cid, error_offset = 0; 1252 enum spdk_nvme_tcp_term_req_fes fes; 1253 1254 SPDK_DEBUGLOG(nvme, "enter\n"); 1255 cid = capsule_resp->rccqe.cid; 1256 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1257 1258 if (!tcp_req) { 1259 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1260 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1261 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1262 goto end; 1263 } 1264 1265 assert(tcp_req->req != NULL); 1266 1267 tcp_req->rsp = capsule_resp->rccqe; 1268 tcp_req->ordering.bits.data_recv = 1; 1269 1270 /* Recv the pdu again */ 1271 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1272 1273 if (nvme_tcp_req_complete_safe(tcp_req)) { 1274 (*reaped)++; 1275 } 1276 1277 return; 1278 1279 end: 1280 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1281 } 1282 1283 static void 1284 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1285 struct nvme_tcp_pdu *pdu) 1286 { 1287 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1288 uint32_t error_offset = 0; 1289 enum spdk_nvme_tcp_term_req_fes fes; 1290 1291 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1292 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1293 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1294 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1295 goto end; 1296 } 1297 1298 /* set the data buffer */ 1299 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1300 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1301 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1302 return; 1303 end: 1304 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1305 } 1306 1307 static void 1308 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1309 { 1310 struct nvme_tcp_req *tcp_req; 1311 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1312 uint32_t error_offset = 0; 1313 enum spdk_nvme_tcp_term_req_fes fes; 1314 int flags = c2h_data->common.flags; 1315 1316 SPDK_DEBUGLOG(nvme, "enter\n"); 1317 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1318 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1319 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1320 if (!tcp_req) { 1321 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1322 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1323 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1324 goto end; 1325 1326 } 1327 1328 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1329 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1330 1331 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1332 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1333 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1334 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1335 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1336 goto end; 1337 } 1338 1339 if (c2h_data->datal > tcp_req->req->payload_size) { 1340 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1341 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1342 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1343 goto end; 1344 } 1345 1346 if (tcp_req->expected_datao != c2h_data->datao) { 1347 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1348 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1349 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1350 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1351 goto end; 1352 } 1353 1354 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1355 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1356 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1357 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1358 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1359 goto end; 1360 1361 } 1362 1363 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1364 c2h_data->datao, c2h_data->datal); 1365 pdu->req = tcp_req; 1366 1367 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1368 return; 1369 1370 end: 1371 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1372 } 1373 1374 static void 1375 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1376 { 1377 struct nvme_tcp_req *tcp_req = cb_arg; 1378 1379 assert(tcp_req != NULL); 1380 1381 tcp_req->ordering.bits.send_ack = 1; 1382 if (tcp_req->r2tl_remain) { 1383 nvme_tcp_send_h2c_data(tcp_req); 1384 } else { 1385 assert(tcp_req->active_r2ts > 0); 1386 tcp_req->active_r2ts--; 1387 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1388 1389 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1390 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1391 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1392 assert(tcp_req->active_r2ts > 0); 1393 tcp_req->ttag = tcp_req->ttag_r2t_next; 1394 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1395 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1396 nvme_tcp_send_h2c_data(tcp_req); 1397 return; 1398 } 1399 1400 /* Need also call this function to free the resource */ 1401 nvme_tcp_req_complete_safe(tcp_req); 1402 } 1403 } 1404 1405 static void 1406 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1407 { 1408 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1409 struct nvme_tcp_pdu *rsp_pdu; 1410 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1411 uint32_t plen, pdo, alignment; 1412 1413 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1414 tcp_req->ordering.bits.send_ack = 0; 1415 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1416 rsp_pdu = tcp_req->pdu; 1417 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1418 h2c_data = &rsp_pdu->hdr.h2c_data; 1419 1420 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1421 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1422 h2c_data->cccid = tcp_req->cid; 1423 h2c_data->ttag = tcp_req->ttag; 1424 h2c_data->datao = tcp_req->datao; 1425 1426 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1427 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1428 h2c_data->datao, h2c_data->datal); 1429 tcp_req->r2tl_remain -= h2c_data->datal; 1430 1431 if (tqpair->flags.host_hdgst_enable) { 1432 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1433 plen += SPDK_NVME_TCP_DIGEST_LEN; 1434 } 1435 1436 rsp_pdu->padding_len = 0; 1437 pdo = plen; 1438 if (tqpair->cpda) { 1439 alignment = (tqpair->cpda + 1) << 2; 1440 if (alignment > plen) { 1441 rsp_pdu->padding_len = alignment - plen; 1442 pdo = plen = alignment; 1443 } 1444 } 1445 1446 h2c_data->common.pdo = pdo; 1447 plen += h2c_data->datal; 1448 if (tqpair->flags.host_ddgst_enable) { 1449 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1450 plen += SPDK_NVME_TCP_DIGEST_LEN; 1451 } 1452 1453 h2c_data->common.plen = plen; 1454 tcp_req->datao += h2c_data->datal; 1455 if (!tcp_req->r2tl_remain) { 1456 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1457 } 1458 1459 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1460 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1461 1462 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1463 } 1464 1465 static void 1466 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1467 { 1468 struct nvme_tcp_req *tcp_req; 1469 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1470 uint32_t cid, error_offset = 0; 1471 enum spdk_nvme_tcp_term_req_fes fes; 1472 1473 SPDK_DEBUGLOG(nvme, "enter\n"); 1474 cid = r2t->cccid; 1475 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1476 if (!tcp_req) { 1477 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1478 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1479 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1480 goto end; 1481 } 1482 1483 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1484 tqpair); 1485 1486 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1487 assert(tcp_req->active_r2ts == 0); 1488 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1489 } 1490 1491 if (tcp_req->datao != r2t->r2to) { 1492 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1493 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1494 goto end; 1495 1496 } 1497 1498 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1499 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1500 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1501 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1502 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1503 goto end; 1504 } 1505 1506 tcp_req->active_r2ts++; 1507 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1508 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1509 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1510 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1511 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1512 tcp_req->ttag_r2t_next = r2t->ttag; 1513 tcp_req->r2tl_remain_next = r2t->r2tl; 1514 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1515 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1516 return; 1517 } else { 1518 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1519 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1520 tqpair); 1521 goto end; 1522 } 1523 } 1524 1525 tcp_req->ttag = r2t->ttag; 1526 tcp_req->r2tl_remain = r2t->r2tl; 1527 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1528 1529 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1530 nvme_tcp_send_h2c_data(tcp_req); 1531 } else { 1532 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1533 } 1534 1535 return; 1536 1537 end: 1538 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1539 1540 } 1541 1542 static void 1543 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1544 { 1545 struct nvme_tcp_pdu *pdu; 1546 int rc; 1547 uint32_t crc32c, error_offset = 0; 1548 enum spdk_nvme_tcp_term_req_fes fes; 1549 1550 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1551 pdu = tqpair->recv_pdu; 1552 1553 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1554 /* check header digest if needed */ 1555 if (pdu->has_hdgst) { 1556 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1557 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1558 if (rc == 0) { 1559 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1560 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1561 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1562 return; 1563 1564 } 1565 } 1566 1567 switch (pdu->hdr.common.pdu_type) { 1568 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1569 nvme_tcp_icresp_handle(tqpair, pdu); 1570 break; 1571 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1572 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1573 break; 1574 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1575 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1576 break; 1577 1578 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1579 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1580 break; 1581 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1582 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1583 break; 1584 1585 default: 1586 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1587 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1588 error_offset = 1; 1589 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1590 break; 1591 } 1592 1593 } 1594 1595 static int 1596 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions) 1597 { 1598 int rc = 0; 1599 struct nvme_tcp_pdu *pdu; 1600 uint32_t data_len; 1601 enum nvme_tcp_pdu_recv_state prev_state; 1602 1603 *reaped = tqpair->async_complete; 1604 tqpair->async_complete = 0; 1605 1606 /* The loop here is to allow for several back-to-back state changes. */ 1607 do { 1608 if (*reaped >= max_completions) { 1609 break; 1610 } 1611 1612 prev_state = tqpair->recv_state; 1613 pdu = tqpair->recv_pdu; 1614 switch (tqpair->recv_state) { 1615 /* If in a new state */ 1616 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1617 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1618 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1619 break; 1620 /* Wait for the pdu common header */ 1621 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1622 assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 1623 rc = nvme_tcp_read_data(tqpair->sock, 1624 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1625 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1626 if (rc < 0) { 1627 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1628 break; 1629 } 1630 pdu->ch_valid_bytes += rc; 1631 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1632 return NVME_TCP_PDU_IN_PROGRESS; 1633 } 1634 1635 /* The command header of this PDU has now been read from the socket. */ 1636 nvme_tcp_pdu_ch_handle(tqpair); 1637 break; 1638 /* Wait for the pdu specific header */ 1639 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1640 assert(pdu->psh_valid_bytes < pdu->psh_len); 1641 rc = nvme_tcp_read_data(tqpair->sock, 1642 pdu->psh_len - pdu->psh_valid_bytes, 1643 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1644 if (rc < 0) { 1645 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1646 break; 1647 } 1648 1649 pdu->psh_valid_bytes += rc; 1650 if (pdu->psh_valid_bytes < pdu->psh_len) { 1651 return NVME_TCP_PDU_IN_PROGRESS; 1652 } 1653 1654 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1655 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1656 break; 1657 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1658 /* check whether the data is valid, if not we just return */ 1659 if (!pdu->data_len) { 1660 return NVME_TCP_PDU_IN_PROGRESS; 1661 } 1662 1663 data_len = pdu->data_len; 1664 /* data digest */ 1665 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1666 tqpair->flags.host_ddgst_enable)) { 1667 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1668 pdu->ddgst_enable = true; 1669 } 1670 1671 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1672 if (rc < 0) { 1673 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1674 break; 1675 } 1676 1677 pdu->rw_offset += rc; 1678 if (pdu->rw_offset < data_len) { 1679 return NVME_TCP_PDU_IN_PROGRESS; 1680 } 1681 1682 assert(pdu->rw_offset == data_len); 1683 /* All of this PDU has now been read from the socket. */ 1684 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1685 break; 1686 case NVME_TCP_PDU_RECV_STATE_ERROR: 1687 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1688 return NVME_TCP_PDU_FATAL; 1689 default: 1690 assert(0); 1691 break; 1692 } 1693 } while (prev_state != tqpair->recv_state); 1694 1695 return rc > 0 ? 0 : rc; 1696 } 1697 1698 static void 1699 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1700 { 1701 uint64_t t02; 1702 struct nvme_tcp_req *tcp_req, *tmp; 1703 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1704 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1705 struct spdk_nvme_ctrlr_process *active_proc; 1706 1707 /* Don't check timeouts during controller initialization. */ 1708 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1709 return; 1710 } 1711 1712 if (nvme_qpair_is_admin_queue(qpair)) { 1713 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1714 } else { 1715 active_proc = qpair->active_proc; 1716 } 1717 1718 /* Only check timeouts if the current process has a timeout callback. */ 1719 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1720 return; 1721 } 1722 1723 t02 = spdk_get_ticks(); 1724 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1725 assert(tcp_req->req != NULL); 1726 1727 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1728 /* 1729 * The requests are in order, so as soon as one has not timed out, 1730 * stop iterating. 1731 */ 1732 break; 1733 } 1734 } 1735 } 1736 1737 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 1738 struct spdk_nvme_qpair *qpair); 1739 1740 static int 1741 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1742 { 1743 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1744 uint32_t reaped; 1745 int rc; 1746 1747 if (qpair->poll_group == NULL) { 1748 rc = spdk_sock_flush(tqpair->sock); 1749 if (rc < 0) { 1750 SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair, 1751 errno, spdk_strerror(errno)); 1752 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1753 nvme_tcp_qpair_check_timeout(qpair); 1754 } 1755 return rc; 1756 } 1757 } 1758 1759 if (max_completions == 0) { 1760 max_completions = spdk_max(tqpair->num_entries, 1); 1761 } else { 1762 max_completions = spdk_min(max_completions, tqpair->num_entries); 1763 } 1764 1765 reaped = 0; 1766 rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions); 1767 if (rc < 0) { 1768 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1769 errno, spdk_strerror(errno)); 1770 goto fail; 1771 } 1772 1773 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1774 nvme_tcp_qpair_check_timeout(qpair); 1775 } 1776 1777 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 1778 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 1779 if (rc != 0 && rc != -EAGAIN) { 1780 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 1781 goto fail; 1782 } else if (rc == 0) { 1783 /* Once the connection is completed, we can submit queued requests */ 1784 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 1785 } 1786 } 1787 1788 return reaped; 1789 fail: 1790 1791 /* 1792 * Since admin queues take the ctrlr_lock before entering this function, 1793 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1794 * to call the generic function which will take the lock for us. 1795 */ 1796 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1797 1798 if (nvme_qpair_is_admin_queue(qpair)) { 1799 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1800 } else { 1801 nvme_ctrlr_disconnect_qpair(qpair); 1802 } 1803 return -ENXIO; 1804 } 1805 1806 static void 1807 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1808 { 1809 struct spdk_nvme_qpair *qpair = ctx; 1810 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1811 int32_t num_completions; 1812 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1813 1814 if (tqpair->needs_poll) { 1815 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1816 tqpair->needs_poll = false; 1817 } 1818 1819 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1820 1821 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1822 pgroup->num_completions += num_completions; 1823 pgroup->stats.nvme_completions += num_completions; 1824 } else { 1825 pgroup->num_completions = -ENXIO; 1826 } 1827 } 1828 1829 static int 1830 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1831 { 1832 struct spdk_nvme_tcp_ic_req *ic_req; 1833 struct nvme_tcp_pdu *pdu; 1834 1835 pdu = tqpair->send_pdu; 1836 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1837 ic_req = &pdu->hdr.ic_req; 1838 1839 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1840 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1841 ic_req->pfv = 0; 1842 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1843 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1844 1845 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1846 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1847 1848 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1849 1850 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1851 return 0; 1852 } 1853 1854 static int 1855 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1856 { 1857 struct sockaddr_storage dst_addr; 1858 struct sockaddr_storage src_addr; 1859 int rc; 1860 struct nvme_tcp_qpair *tqpair; 1861 int family; 1862 long int port; 1863 char *sock_impl_name; 1864 struct spdk_sock_impl_opts impl_opts; 1865 size_t impl_opts_size = sizeof(impl_opts); 1866 struct spdk_sock_opts opts; 1867 1868 tqpair = nvme_tcp_qpair(qpair); 1869 1870 switch (ctrlr->trid.adrfam) { 1871 case SPDK_NVMF_ADRFAM_IPV4: 1872 family = AF_INET; 1873 break; 1874 case SPDK_NVMF_ADRFAM_IPV6: 1875 family = AF_INET6; 1876 break; 1877 default: 1878 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1879 rc = -1; 1880 return rc; 1881 } 1882 1883 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1884 1885 memset(&dst_addr, 0, sizeof(dst_addr)); 1886 1887 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1888 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1889 if (rc != 0) { 1890 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1891 return rc; 1892 } 1893 1894 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1895 memset(&src_addr, 0, sizeof(src_addr)); 1896 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1897 if (rc != 0) { 1898 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1899 return rc; 1900 } 1901 } 1902 1903 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1904 if (port <= 0 || port >= INT_MAX) { 1905 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1906 rc = -1; 1907 return rc; 1908 } 1909 1910 sock_impl_name = ctrlr->opts.psk[0] ? "ssl" : NULL; 1911 SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name); 1912 1913 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 1914 impl_opts.enable_ktls = false; 1915 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 1916 /* TODO: Change current PSK HEX string format to TLS PSK Interchange Format */ 1917 impl_opts.psk_key = ctrlr->opts.psk; 1918 /* TODO: generate identity from hostnqn instead */ 1919 impl_opts.psk_identity = "psk.spdk.io"; 1920 1921 opts.opts_size = sizeof(opts); 1922 spdk_sock_get_default_opts(&opts); 1923 opts.priority = ctrlr->trid.priority; 1924 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1925 if (ctrlr->opts.transport_ack_timeout) { 1926 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 1927 } 1928 if (sock_impl_name) { 1929 opts.impl_opts = &impl_opts; 1930 opts.impl_opts_size = sizeof(impl_opts); 1931 } 1932 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts); 1933 if (!tqpair->sock) { 1934 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1935 tqpair, ctrlr->trid.traddr, port); 1936 rc = -1; 1937 return rc; 1938 } 1939 1940 return 0; 1941 } 1942 1943 static int 1944 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1945 { 1946 struct nvme_tcp_qpair *tqpair; 1947 int rc; 1948 1949 tqpair = nvme_tcp_qpair(qpair); 1950 1951 /* Prevent this function from being called recursively, as it could lead to issues with 1952 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 1953 * call. 1954 */ 1955 if (tqpair->flags.in_connect_poll) { 1956 return -EAGAIN; 1957 } 1958 1959 tqpair->flags.in_connect_poll = 1; 1960 1961 switch (tqpair->state) { 1962 case NVME_TCP_QPAIR_STATE_INVALID: 1963 case NVME_TCP_QPAIR_STATE_INITIALIZING: 1964 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 1965 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1966 rc = -ETIMEDOUT; 1967 break; 1968 } 1969 rc = -EAGAIN; 1970 break; 1971 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1972 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 1973 if (rc < 0) { 1974 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1975 break; 1976 } 1977 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 1978 rc = -EAGAIN; 1979 break; 1980 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1981 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 1982 if (rc == 0) { 1983 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1984 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 1985 } else if (rc != -EAGAIN) { 1986 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 1987 } 1988 break; 1989 case NVME_TCP_QPAIR_STATE_RUNNING: 1990 rc = 0; 1991 break; 1992 default: 1993 assert(false); 1994 rc = -EINVAL; 1995 break; 1996 } 1997 1998 tqpair->flags.in_connect_poll = 0; 1999 return rc; 2000 } 2001 2002 static int 2003 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2004 { 2005 int rc = 0; 2006 struct nvme_tcp_qpair *tqpair; 2007 struct nvme_tcp_poll_group *tgroup; 2008 2009 tqpair = nvme_tcp_qpair(qpair); 2010 2011 if (!tqpair->sock) { 2012 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2013 if (rc < 0) { 2014 return rc; 2015 } 2016 } 2017 2018 if (qpair->poll_group) { 2019 rc = nvme_poll_group_connect_qpair(qpair); 2020 if (rc) { 2021 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2022 return rc; 2023 } 2024 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2025 tqpair->stats = &tgroup->stats; 2026 tqpair->shared_stats = true; 2027 } else { 2028 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2029 if (!tqpair->stats) { 2030 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2031 return -ENOMEM; 2032 } 2033 } 2034 2035 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2036 /* Explicitly set the state and recv_state of tqpair */ 2037 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2038 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2039 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2040 } 2041 rc = nvme_tcp_qpair_icreq_send(tqpair); 2042 if (rc != 0) { 2043 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2044 return rc; 2045 } 2046 2047 return rc; 2048 } 2049 2050 static struct spdk_nvme_qpair * 2051 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2052 uint16_t qid, uint32_t qsize, 2053 enum spdk_nvme_qprio qprio, 2054 uint32_t num_requests, bool async) 2055 { 2056 struct nvme_tcp_qpair *tqpair; 2057 struct spdk_nvme_qpair *qpair; 2058 int rc; 2059 2060 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2061 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2062 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2063 return NULL; 2064 } 2065 2066 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2067 if (!tqpair) { 2068 SPDK_ERRLOG("failed to get create tqpair\n"); 2069 return NULL; 2070 } 2071 2072 /* Set num_entries one less than queue size. According to NVMe 2073 * and NVMe-oF specs we can not submit queue size requests, 2074 * one slot shall always remain empty. 2075 */ 2076 tqpair->num_entries = qsize - 1; 2077 qpair = &tqpair->qpair; 2078 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2079 if (rc != 0) { 2080 free(tqpair); 2081 return NULL; 2082 } 2083 2084 rc = nvme_tcp_alloc_reqs(tqpair); 2085 if (rc) { 2086 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2087 return NULL; 2088 } 2089 2090 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2091 * So create the socket first when creating a qpair. */ 2092 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2093 if (rc) { 2094 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2095 return NULL; 2096 } 2097 2098 return qpair; 2099 } 2100 2101 static struct spdk_nvme_qpair * 2102 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2103 const struct spdk_nvme_io_qpair_opts *opts) 2104 { 2105 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2106 opts->io_queue_requests, opts->async_mode); 2107 } 2108 2109 /* We have to use the typedef in the function declaration to appease astyle. */ 2110 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2111 2112 static spdk_nvme_ctrlr_t * 2113 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2114 const struct spdk_nvme_ctrlr_opts *opts, 2115 void *devhandle) 2116 { 2117 struct nvme_tcp_ctrlr *tctrlr; 2118 int rc; 2119 2120 tctrlr = calloc(1, sizeof(*tctrlr)); 2121 if (tctrlr == NULL) { 2122 SPDK_ERRLOG("could not allocate ctrlr\n"); 2123 return NULL; 2124 } 2125 2126 tctrlr->ctrlr.opts = *opts; 2127 tctrlr->ctrlr.trid = *trid; 2128 2129 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2130 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2131 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2132 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2133 } 2134 2135 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2136 if (rc != 0) { 2137 free(tctrlr); 2138 return NULL; 2139 } 2140 2141 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2142 tctrlr->ctrlr.opts.admin_queue_size, 0, 2143 tctrlr->ctrlr.opts.admin_queue_size, true); 2144 if (!tctrlr->ctrlr.adminq) { 2145 SPDK_ERRLOG("failed to create admin qpair\n"); 2146 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2147 return NULL; 2148 } 2149 2150 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2151 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2152 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2153 return NULL; 2154 } 2155 2156 return &tctrlr->ctrlr; 2157 } 2158 2159 static uint32_t 2160 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2161 { 2162 /* TCP transport doesn't limit maximum IO transfer size. */ 2163 return UINT32_MAX; 2164 } 2165 2166 static uint16_t 2167 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2168 { 2169 /* 2170 * We do not support >1 SGE in the initiator currently, 2171 * so we can only return 1 here. Once that support is 2172 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 2173 * instead. 2174 */ 2175 return 1; 2176 } 2177 2178 static int 2179 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2180 int (*iter_fn)(struct nvme_request *req, void *arg), 2181 void *arg) 2182 { 2183 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2184 struct nvme_tcp_req *tcp_req, *tmp; 2185 int rc; 2186 2187 assert(iter_fn != NULL); 2188 2189 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2190 assert(tcp_req->req != NULL); 2191 2192 rc = iter_fn(tcp_req->req, arg); 2193 if (rc != 0) { 2194 return rc; 2195 } 2196 } 2197 2198 return 0; 2199 } 2200 2201 static void 2202 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2203 { 2204 struct nvme_tcp_req *tcp_req, *tmp; 2205 struct spdk_nvme_cpl cpl = {}; 2206 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2207 2208 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2209 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2210 2211 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2212 assert(tcp_req->req != NULL); 2213 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2214 continue; 2215 } 2216 2217 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2218 } 2219 } 2220 2221 static struct spdk_nvme_transport_poll_group * 2222 nvme_tcp_poll_group_create(void) 2223 { 2224 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2225 2226 if (group == NULL) { 2227 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2228 return NULL; 2229 } 2230 2231 TAILQ_INIT(&group->needs_poll); 2232 2233 group->sock_group = spdk_sock_group_create(group); 2234 if (group->sock_group == NULL) { 2235 free(group); 2236 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2237 return NULL; 2238 } 2239 2240 return &group->group; 2241 } 2242 2243 static struct spdk_nvme_transport_poll_group * 2244 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2245 { 2246 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2247 struct spdk_sock_group *group = NULL; 2248 int rc; 2249 2250 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2251 if (!rc && group != NULL) { 2252 return spdk_sock_group_get_ctx(group); 2253 } 2254 2255 return NULL; 2256 } 2257 2258 static int 2259 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2260 { 2261 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2262 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2263 2264 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2265 return -EPROTO; 2266 } 2267 return 0; 2268 } 2269 2270 static int 2271 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2272 { 2273 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2274 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2275 2276 if (tqpair->needs_poll) { 2277 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2278 tqpair->needs_poll = false; 2279 } 2280 2281 if (tqpair->sock && group->sock_group) { 2282 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2283 return -EPROTO; 2284 } 2285 } 2286 return 0; 2287 } 2288 2289 static int 2290 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2291 struct spdk_nvme_qpair *qpair) 2292 { 2293 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2294 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2295 2296 /* disconnected qpairs won't have a sock to add. */ 2297 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2298 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2299 return -EPROTO; 2300 } 2301 } 2302 2303 return 0; 2304 } 2305 2306 static int 2307 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2308 struct spdk_nvme_qpair *qpair) 2309 { 2310 struct nvme_tcp_qpair *tqpair; 2311 struct nvme_tcp_poll_group *group; 2312 2313 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2314 2315 tqpair = nvme_tcp_qpair(qpair); 2316 group = nvme_tcp_poll_group(tgroup); 2317 2318 assert(tqpair->shared_stats == true); 2319 tqpair->stats = &g_dummy_stats; 2320 2321 if (tqpair->needs_poll) { 2322 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2323 tqpair->needs_poll = false; 2324 } 2325 2326 return 0; 2327 } 2328 2329 static int64_t 2330 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2331 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2332 { 2333 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2334 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2335 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2336 int num_events; 2337 2338 group->completions_per_qpair = completions_per_qpair; 2339 group->num_completions = 0; 2340 group->stats.polls++; 2341 2342 num_events = spdk_sock_group_poll(group->sock_group); 2343 2344 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2345 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2346 } 2347 2348 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2349 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2350 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2351 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2352 } 2353 2354 if (spdk_unlikely(num_events < 0)) { 2355 return num_events; 2356 } 2357 2358 group->stats.idle_polls += !num_events; 2359 group->stats.socket_completions += num_events; 2360 2361 return group->num_completions; 2362 } 2363 2364 static int 2365 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2366 { 2367 int rc; 2368 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2369 2370 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2371 return -EBUSY; 2372 } 2373 2374 rc = spdk_sock_group_close(&group->sock_group); 2375 if (rc != 0) { 2376 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2377 assert(false); 2378 } 2379 2380 free(tgroup); 2381 2382 return 0; 2383 } 2384 2385 static int 2386 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2387 struct spdk_nvme_transport_poll_group_stat **_stats) 2388 { 2389 struct nvme_tcp_poll_group *group; 2390 struct spdk_nvme_transport_poll_group_stat *stats; 2391 2392 if (tgroup == NULL || _stats == NULL) { 2393 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2394 return -EINVAL; 2395 } 2396 2397 group = nvme_tcp_poll_group(tgroup); 2398 2399 stats = calloc(1, sizeof(*stats)); 2400 if (!stats) { 2401 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2402 return -ENOMEM; 2403 } 2404 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2405 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2406 2407 *_stats = stats; 2408 2409 return 0; 2410 } 2411 2412 static void 2413 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2414 struct spdk_nvme_transport_poll_group_stat *stats) 2415 { 2416 free(stats); 2417 } 2418 2419 const struct spdk_nvme_transport_ops tcp_ops = { 2420 .name = "TCP", 2421 .type = SPDK_NVME_TRANSPORT_TCP, 2422 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2423 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2424 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2425 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2426 2427 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2428 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2429 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2430 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2431 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2432 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2433 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2434 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2435 2436 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2437 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2438 2439 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2440 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2441 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2442 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2443 2444 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2445 .qpair_reset = nvme_tcp_qpair_reset, 2446 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2447 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2448 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2449 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2450 2451 .poll_group_create = nvme_tcp_poll_group_create, 2452 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2453 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2454 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2455 .poll_group_add = nvme_tcp_poll_group_add, 2456 .poll_group_remove = nvme_tcp_poll_group_remove, 2457 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2458 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2459 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2460 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2461 }; 2462 2463 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2464 2465 SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP) 2466 { 2467 struct spdk_trace_tpoint_opts opts[] = { 2468 { 2469 "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT, 2470 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1, 2471 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2472 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2473 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2474 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2475 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2476 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2477 } 2478 }, 2479 { 2480 "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE, 2481 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0, 2482 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2483 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2484 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2485 } 2486 }, 2487 }; 2488 2489 spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p'); 2490 spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q'); 2491 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 2492 } 2493