1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/thread.h" 22 #include "spdk/trace.h" 23 #include "spdk/util.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 #include "spdk_internal/trace_defs.h" 27 28 #define NVME_TCP_RW_BUFFER_SIZE 131072 29 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 30 31 #define NVME_TCP_HPDA_DEFAULT 0 32 #define NVME_TCP_MAX_R2T_DEFAULT 1 33 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 34 35 /* 36 * Maximum value of transport_ack_timeout used by TCP controller 37 */ 38 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 39 40 41 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 42 struct nvme_tcp_ctrlr { 43 struct spdk_nvme_ctrlr ctrlr; 44 }; 45 46 struct nvme_tcp_poll_group { 47 struct spdk_nvme_transport_poll_group group; 48 struct spdk_sock_group *sock_group; 49 uint32_t completions_per_qpair; 50 int64_t num_completions; 51 52 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 53 struct spdk_nvme_tcp_stat stats; 54 }; 55 56 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 57 struct nvme_tcp_qpair { 58 struct spdk_nvme_qpair qpair; 59 struct spdk_sock *sock; 60 61 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 62 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 63 64 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 65 struct nvme_tcp_pdu *recv_pdu; 66 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 67 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 68 enum nvme_tcp_pdu_recv_state recv_state; 69 struct nvme_tcp_req *tcp_reqs; 70 struct spdk_nvme_tcp_stat *stats; 71 72 uint16_t num_entries; 73 uint16_t async_complete; 74 75 struct { 76 uint16_t host_hdgst_enable: 1; 77 uint16_t host_ddgst_enable: 1; 78 uint16_t icreq_send_ack: 1; 79 uint16_t in_connect_poll: 1; 80 uint16_t reserved: 12; 81 } flags; 82 83 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 84 uint32_t maxh2cdata; 85 86 uint32_t maxr2t; 87 88 /* 0 based value, which is used to guide the padding */ 89 uint8_t cpda; 90 91 enum nvme_tcp_qpair_state state; 92 93 TAILQ_ENTRY(nvme_tcp_qpair) link; 94 bool needs_poll; 95 96 uint64_t icreq_timeout_tsc; 97 98 bool shared_stats; 99 }; 100 101 enum nvme_tcp_req_state { 102 NVME_TCP_REQ_FREE, 103 NVME_TCP_REQ_ACTIVE, 104 NVME_TCP_REQ_ACTIVE_R2T, 105 }; 106 107 struct nvme_tcp_req { 108 struct nvme_request *req; 109 enum nvme_tcp_req_state state; 110 uint16_t cid; 111 uint16_t ttag; 112 uint32_t datao; 113 uint32_t expected_datao; 114 uint32_t r2tl_remain; 115 uint32_t active_r2ts; 116 /* Used to hold a value received from subsequent R2T while we are still 117 * waiting for H2C complete */ 118 uint16_t ttag_r2t_next; 119 bool in_capsule_data; 120 /* It is used to track whether the req can be safely freed */ 121 union { 122 uint8_t raw; 123 struct { 124 /* The last send operation completed - kernel released send buffer */ 125 uint8_t send_ack : 1; 126 /* Data transfer completed - target send resp or last data bit */ 127 uint8_t data_recv : 1; 128 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 129 * from kernel) to send H2C */ 130 uint8_t h2c_send_waiting_ack : 1; 131 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 132 * Rare case, actual when dealing with target that can send several R2T requests. 133 * SPDK TCP target sends 1 R2T for the whole data buffer */ 134 uint8_t r2t_waiting_h2c_complete : 1; 135 uint8_t reserved : 4; 136 } bits; 137 } ordering; 138 struct nvme_tcp_pdu *pdu; 139 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 140 uint32_t iovcnt; 141 /* Used to hold a value received from subsequent R2T while we are still 142 * waiting for H2C ack */ 143 uint32_t r2tl_remain_next; 144 struct nvme_tcp_qpair *tqpair; 145 TAILQ_ENTRY(nvme_tcp_req) link; 146 struct spdk_nvme_cpl rsp; 147 }; 148 149 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 150 151 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 152 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 153 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 154 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 155 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 156 struct spdk_nvme_cpl *rsp, bool print_on_error); 157 158 static inline struct nvme_tcp_qpair * 159 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 160 { 161 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 162 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 163 } 164 165 static inline struct nvme_tcp_poll_group * 166 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 167 { 168 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 169 } 170 171 static inline struct nvme_tcp_ctrlr * 172 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 173 { 174 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 175 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 176 } 177 178 static struct nvme_tcp_req * 179 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 180 { 181 struct nvme_tcp_req *tcp_req; 182 183 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 184 if (!tcp_req) { 185 return NULL; 186 } 187 188 assert(tcp_req->state == NVME_TCP_REQ_FREE); 189 tcp_req->state = NVME_TCP_REQ_ACTIVE; 190 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 191 tcp_req->datao = 0; 192 tcp_req->expected_datao = 0; 193 tcp_req->req = NULL; 194 tcp_req->in_capsule_data = false; 195 tcp_req->r2tl_remain = 0; 196 tcp_req->r2tl_remain_next = 0; 197 tcp_req->active_r2ts = 0; 198 tcp_req->iovcnt = 0; 199 tcp_req->ordering.raw = 0; 200 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 201 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 202 203 return tcp_req; 204 } 205 206 static void 207 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 208 { 209 assert(tcp_req->state != NVME_TCP_REQ_FREE); 210 tcp_req->state = NVME_TCP_REQ_FREE; 211 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 212 } 213 214 static int 215 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 216 { 217 struct addrinfo *res; 218 struct addrinfo hints; 219 int ret; 220 221 memset(&hints, 0, sizeof(hints)); 222 hints.ai_family = family; 223 hints.ai_socktype = SOCK_STREAM; 224 hints.ai_protocol = 0; 225 226 ret = getaddrinfo(addr, service, &hints, &res); 227 if (ret) { 228 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 229 return ret; 230 } 231 232 if (res->ai_addrlen > sizeof(*sa)) { 233 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 234 ret = -EINVAL; 235 } else { 236 memcpy(sa, res->ai_addr, res->ai_addrlen); 237 } 238 239 freeaddrinfo(res); 240 return ret; 241 } 242 243 static void 244 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 245 { 246 free(tqpair->tcp_reqs); 247 tqpair->tcp_reqs = NULL; 248 249 spdk_free(tqpair->send_pdus); 250 tqpair->send_pdus = NULL; 251 } 252 253 static int 254 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 255 { 256 uint16_t i; 257 struct nvme_tcp_req *tcp_req; 258 259 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 260 if (tqpair->tcp_reqs == NULL) { 261 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 262 goto fail; 263 } 264 265 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 266 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 267 0x1000, NULL, 268 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 269 270 if (tqpair->send_pdus == NULL) { 271 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 272 goto fail; 273 } 274 275 TAILQ_INIT(&tqpair->send_queue); 276 TAILQ_INIT(&tqpair->free_reqs); 277 TAILQ_INIT(&tqpair->outstanding_reqs); 278 for (i = 0; i < tqpair->num_entries; i++) { 279 tcp_req = &tqpair->tcp_reqs[i]; 280 tcp_req->cid = i; 281 tcp_req->tqpair = tqpair; 282 tcp_req->pdu = &tqpair->send_pdus[i]; 283 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 284 } 285 286 tqpair->send_pdu = &tqpair->send_pdus[i]; 287 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 288 289 return 0; 290 fail: 291 nvme_tcp_free_reqs(tqpair); 292 return -ENOMEM; 293 } 294 295 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 296 297 static void 298 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 299 { 300 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 301 struct nvme_tcp_pdu *pdu; 302 int rc; 303 struct nvme_tcp_poll_group *group; 304 305 if (tqpair->needs_poll) { 306 group = nvme_tcp_poll_group(qpair->poll_group); 307 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 308 tqpair->needs_poll = false; 309 } 310 311 rc = spdk_sock_close(&tqpair->sock); 312 313 if (tqpair->sock != NULL) { 314 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 315 /* Set it to NULL manually */ 316 tqpair->sock = NULL; 317 } 318 319 /* clear the send_queue */ 320 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 321 pdu = TAILQ_FIRST(&tqpair->send_queue); 322 /* Remove the pdu from the send_queue to prevent the wrong sending out 323 * in the next round connection 324 */ 325 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 326 } 327 328 nvme_tcp_qpair_abort_reqs(qpair, 0); 329 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 330 } 331 332 static int 333 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 334 { 335 struct nvme_tcp_qpair *tqpair; 336 337 assert(qpair != NULL); 338 nvme_tcp_qpair_abort_reqs(qpair, 0); 339 nvme_qpair_deinit(qpair); 340 tqpair = nvme_tcp_qpair(qpair); 341 nvme_tcp_free_reqs(tqpair); 342 if (!tqpair->shared_stats) { 343 free(tqpair->stats); 344 } 345 free(tqpair); 346 347 return 0; 348 } 349 350 static int 351 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 352 { 353 return 0; 354 } 355 356 static int 357 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 358 { 359 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 360 361 if (ctrlr->adminq) { 362 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 363 } 364 365 nvme_ctrlr_destruct_finish(ctrlr); 366 367 free(tctrlr); 368 369 return 0; 370 } 371 372 static void 373 _pdu_write_done(void *cb_arg, int err) 374 { 375 struct nvme_tcp_pdu *pdu = cb_arg; 376 struct nvme_tcp_qpair *tqpair = pdu->qpair; 377 struct nvme_tcp_poll_group *pgroup; 378 379 /* If there are queued requests, we assume they are queued because they are waiting 380 * for resources to be released. Those resources are almost certainly released in 381 * response to a PDU completing here. However, to attempt to make forward progress 382 * the qpair needs to be polled and we can't rely on another network event to make 383 * that happen. Add it to a list of qpairs to poll regardless of network activity 384 * here. 385 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 386 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 387 * forward progress in case that the resources are released after icreq's or CONNECT's 388 * resp is processed. */ 389 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 390 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 391 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 392 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 393 394 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 395 tqpair->needs_poll = true; 396 } 397 398 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 399 400 if (err != 0) { 401 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 402 return; 403 } 404 405 assert(pdu->cb_fn != NULL); 406 pdu->cb_fn(pdu->cb_arg); 407 } 408 409 static void 410 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 411 { 412 uint32_t mapped_length = 0; 413 struct nvme_tcp_qpair *tqpair = pdu->qpair; 414 415 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 416 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 417 &mapped_length); 418 pdu->sock_req.cb_fn = _pdu_write_done; 419 pdu->sock_req.cb_arg = pdu; 420 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 421 tqpair->stats->submitted_requests++; 422 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 423 } 424 425 static void 426 data_crc32_accel_done(void *cb_arg, int status) 427 { 428 struct nvme_tcp_pdu *pdu = cb_arg; 429 430 if (spdk_unlikely(status)) { 431 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 432 _pdu_write_done(pdu, status); 433 return; 434 } 435 436 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 437 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 438 439 _tcp_write_pdu(pdu); 440 } 441 442 static void 443 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 444 { 445 struct nvme_tcp_qpair *tqpair = pdu->qpair; 446 uint32_t crc32c; 447 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 448 449 /* Data Digest */ 450 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 451 tqpair->flags.host_ddgst_enable) { 452 /* Only suport this limited case for the first step */ 453 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 454 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 455 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 456 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 457 &pdu->data_digest_crc32, pdu->data_iov, 458 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 459 return; 460 } 461 462 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 463 crc32c = crc32c ^ SPDK_CRC32C_XOR; 464 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 465 } 466 467 _tcp_write_pdu(pdu); 468 } 469 470 static int 471 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 472 struct nvme_tcp_pdu *pdu, 473 nvme_tcp_qpair_xfer_complete_cb cb_fn, 474 void *cb_arg) 475 { 476 int hlen; 477 uint32_t crc32c; 478 479 hlen = pdu->hdr.common.hlen; 480 pdu->cb_fn = cb_fn; 481 pdu->cb_arg = cb_arg; 482 pdu->qpair = tqpair; 483 484 /* Header Digest */ 485 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 486 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 487 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 488 } 489 490 pdu_data_crc32_compute(pdu); 491 492 return 0; 493 } 494 495 /* 496 * Build SGL describing contiguous payload buffer. 497 */ 498 static int 499 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 500 { 501 struct nvme_request *req = tcp_req->req; 502 503 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 504 tcp_req->iov[0].iov_len = req->payload_size; 505 tcp_req->iovcnt = 1; 506 507 SPDK_DEBUGLOG(nvme, "enter\n"); 508 509 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 510 511 return 0; 512 } 513 514 /* 515 * Build SGL describing scattered payload buffer. 516 */ 517 static int 518 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 519 { 520 int rc; 521 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 522 struct nvme_request *req = tcp_req->req; 523 524 SPDK_DEBUGLOG(nvme, "enter\n"); 525 526 assert(req->payload_size != 0); 527 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 528 assert(req->payload.reset_sgl_fn != NULL); 529 assert(req->payload.next_sge_fn != NULL); 530 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 531 532 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 533 remaining_size = req->payload_size; 534 535 do { 536 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 537 &length); 538 if (rc) { 539 return -1; 540 } 541 542 length = spdk_min(length, remaining_size); 543 tcp_req->iov[iovcnt].iov_len = length; 544 remaining_size -= length; 545 iovcnt++; 546 } while (remaining_size > 0 && iovcnt < max_num_sgl); 547 548 549 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 550 if (remaining_size > 0) { 551 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 552 tcp_req, iovcnt, remaining_size); 553 return -1; 554 } 555 556 tcp_req->iovcnt = iovcnt; 557 558 return 0; 559 } 560 561 static int 562 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 563 struct nvme_tcp_req *tcp_req) 564 { 565 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 566 int rc = 0; 567 enum spdk_nvme_data_transfer xfer; 568 uint32_t max_in_capsule_data_size; 569 570 tcp_req->req = req; 571 req->cmd.cid = tcp_req->cid; 572 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 573 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 574 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 575 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 576 577 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 578 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 579 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 580 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 581 } else { 582 rc = -1; 583 } 584 585 if (rc) { 586 return rc; 587 } 588 589 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 590 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 591 592 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 593 } else { 594 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 595 } 596 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 597 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 598 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 599 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 600 } 601 602 if (req->payload_size <= max_in_capsule_data_size) { 603 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 604 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 605 req->cmd.dptr.sgl1.address = 0; 606 tcp_req->in_capsule_data = true; 607 } 608 } 609 610 return 0; 611 } 612 613 static inline bool 614 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 615 { 616 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 617 return false; 618 } 619 620 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 621 assert(tcp_req->tqpair != NULL); 622 assert(tcp_req->req != NULL); 623 624 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 625 626 if (!tcp_req->tqpair->qpair.in_completion_context) { 627 tcp_req->tqpair->async_complete++; 628 } 629 630 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 631 return true; 632 } 633 634 static void 635 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 636 { 637 struct nvme_tcp_req *tcp_req = cb_arg; 638 639 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 640 tcp_req->tqpair->qpair.id); 641 tcp_req->ordering.bits.send_ack = 1; 642 /* Handle the r2t case */ 643 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 644 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 645 nvme_tcp_send_h2c_data(tcp_req); 646 } else { 647 nvme_tcp_req_complete_safe(tcp_req); 648 } 649 } 650 651 static int 652 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 653 struct nvme_tcp_req *tcp_req) 654 { 655 struct nvme_tcp_pdu *pdu; 656 struct spdk_nvme_tcp_cmd *capsule_cmd; 657 uint32_t plen = 0, alignment; 658 uint8_t pdo; 659 660 SPDK_DEBUGLOG(nvme, "enter\n"); 661 pdu = tcp_req->pdu; 662 663 capsule_cmd = &pdu->hdr.capsule_cmd; 664 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 665 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 666 capsule_cmd->ccsqe = tcp_req->req->cmd; 667 668 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 669 670 if (tqpair->flags.host_hdgst_enable) { 671 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 672 tcp_req); 673 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 674 plen += SPDK_NVME_TCP_DIGEST_LEN; 675 } 676 677 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 678 goto end; 679 } 680 681 pdo = plen; 682 pdu->padding_len = 0; 683 if (tqpair->cpda) { 684 alignment = (tqpair->cpda + 1) << 2; 685 if (alignment > plen) { 686 pdu->padding_len = alignment - plen; 687 pdo = alignment; 688 plen = alignment; 689 } 690 } 691 692 capsule_cmd->common.pdo = pdo; 693 plen += tcp_req->req->payload_size; 694 if (tqpair->flags.host_ddgst_enable) { 695 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 696 plen += SPDK_NVME_TCP_DIGEST_LEN; 697 } 698 699 tcp_req->datao = 0; 700 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 701 0, tcp_req->req->payload_size); 702 end: 703 capsule_cmd->common.plen = plen; 704 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 705 706 } 707 708 static int 709 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 710 struct nvme_request *req) 711 { 712 struct nvme_tcp_qpair *tqpair; 713 struct nvme_tcp_req *tcp_req; 714 715 tqpair = nvme_tcp_qpair(qpair); 716 assert(tqpair != NULL); 717 assert(req != NULL); 718 719 tcp_req = nvme_tcp_req_get(tqpair); 720 if (!tcp_req) { 721 tqpair->stats->queued_requests++; 722 /* Inform the upper layer to try again later. */ 723 return -EAGAIN; 724 } 725 726 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 727 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 728 nvme_tcp_req_put(tqpair, tcp_req); 729 return -1; 730 } 731 732 spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 733 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 734 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12); 735 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 736 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 737 } 738 739 static int 740 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 741 { 742 return 0; 743 } 744 745 static void 746 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 747 struct nvme_tcp_qpair *tqpair, 748 struct spdk_nvme_cpl *rsp, 749 bool print_on_error) 750 { 751 struct spdk_nvme_cpl cpl; 752 spdk_nvme_cmd_cb user_cb; 753 void *user_cb_arg; 754 struct spdk_nvme_qpair *qpair; 755 struct nvme_request *req; 756 bool error, print_error; 757 758 assert(tcp_req->req != NULL); 759 req = tcp_req->req; 760 761 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 762 memcpy(&cpl, rsp, sizeof(cpl)); 763 user_cb = req->cb_fn; 764 user_cb_arg = req->cb_arg; 765 qpair = req->qpair; 766 767 error = spdk_nvme_cpl_is_error(rsp); 768 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 769 770 if (print_error) { 771 spdk_nvme_qpair_print_command(qpair, &req->cmd); 772 } 773 774 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 775 spdk_nvme_qpair_print_completion(qpair, rsp); 776 } 777 778 spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 779 (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw); 780 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 781 nvme_tcp_req_put(tqpair, tcp_req); 782 nvme_free_request(req); 783 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 784 } 785 786 static void 787 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 788 { 789 struct nvme_tcp_req *tcp_req, *tmp; 790 struct spdk_nvme_cpl cpl = {}; 791 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 792 793 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 794 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 795 cpl.status.dnr = dnr; 796 797 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 798 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 799 } 800 } 801 802 static inline void 803 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 804 enum nvme_tcp_pdu_recv_state state) 805 { 806 if (tqpair->recv_state == state) { 807 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 808 tqpair, state); 809 return; 810 } 811 tqpair->recv_state = state; 812 } 813 814 static void 815 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 816 { 817 struct nvme_tcp_qpair *tqpair = cb_arg; 818 819 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 820 } 821 822 static void 823 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 824 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 825 { 826 struct nvme_tcp_pdu *rsp_pdu; 827 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 828 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 829 uint8_t copy_len; 830 831 rsp_pdu = tqpair->send_pdu; 832 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 833 h2c_term_req = &rsp_pdu->hdr.term_req; 834 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 835 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 836 837 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 838 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 839 DSET32(&h2c_term_req->fei, error_offset); 840 } 841 842 copy_len = pdu->hdr.common.hlen; 843 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 844 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 845 } 846 847 /* Copy the error info into the buffer */ 848 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 849 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 850 851 /* Contain the header len of the wrong received pdu */ 852 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 853 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 854 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 855 } 856 857 static bool 858 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 859 { 860 switch (tqpair->state) { 861 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 862 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 863 case NVME_TCP_QPAIR_STATE_RUNNING: 864 return true; 865 default: 866 return false; 867 } 868 } 869 870 static void 871 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 872 { 873 struct nvme_tcp_pdu *pdu; 874 uint32_t error_offset = 0; 875 enum spdk_nvme_tcp_term_req_fes fes; 876 uint32_t expected_hlen, hd_len = 0; 877 bool plen_error = false; 878 879 pdu = tqpair->recv_pdu; 880 881 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 882 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 883 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 884 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 885 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 886 goto err; 887 } 888 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 889 if (pdu->hdr.common.plen != expected_hlen) { 890 plen_error = true; 891 } 892 } else { 893 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 894 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 895 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 896 goto err; 897 } 898 899 switch (pdu->hdr.common.pdu_type) { 900 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 901 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 902 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 903 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 904 } 905 906 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 907 plen_error = true; 908 } 909 break; 910 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 911 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 912 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 913 plen_error = true; 914 } 915 break; 916 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 917 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 918 if ((pdu->hdr.common.plen <= expected_hlen) || 919 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 920 plen_error = true; 921 } 922 break; 923 case SPDK_NVME_TCP_PDU_TYPE_R2T: 924 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 925 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 926 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 927 } 928 929 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 930 plen_error = true; 931 } 932 break; 933 934 default: 935 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 936 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 937 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 938 goto err; 939 } 940 } 941 942 if (pdu->hdr.common.hlen != expected_hlen) { 943 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 944 expected_hlen, pdu->hdr.common.hlen); 945 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 946 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 947 goto err; 948 949 } else if (plen_error) { 950 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 951 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 952 goto err; 953 } else { 954 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 955 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 956 return; 957 } 958 err: 959 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 960 } 961 962 static struct nvme_tcp_req * 963 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 964 { 965 assert(tqpair != NULL); 966 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 967 return NULL; 968 } 969 970 return &tqpair->tcp_reqs[cid]; 971 } 972 973 static void 974 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 975 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 976 { 977 struct nvme_tcp_req *tcp_req; 978 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 979 uint8_t flags; 980 981 tcp_req = pdu->req; 982 assert(tcp_req != NULL); 983 984 SPDK_DEBUGLOG(nvme, "enter\n"); 985 c2h_data = &pdu->hdr.c2h_data; 986 tcp_req->datao += pdu->data_len; 987 flags = c2h_data->common.flags; 988 989 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 990 if (tcp_req->datao == tcp_req->req->payload_size) { 991 tcp_req->rsp.status.p = 0; 992 } else { 993 tcp_req->rsp.status.p = 1; 994 } 995 996 tcp_req->rsp.cid = tcp_req->cid; 997 tcp_req->rsp.sqid = tqpair->qpair.id; 998 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 999 tcp_req->ordering.bits.data_recv = 1; 1000 if (nvme_tcp_req_complete_safe(tcp_req)) { 1001 (*reaped)++; 1002 } 1003 } 1004 } 1005 } 1006 1007 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1008 "Invalid PDU Header Field", 1009 "PDU Sequence Error", 1010 "Header Digest Error", 1011 "Data Transfer Out of Range", 1012 "Data Transfer Limit Exceeded", 1013 "Unsupported parameter", 1014 }; 1015 1016 static void 1017 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1018 { 1019 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1020 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1021 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1022 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1023 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1024 DGET32(c2h_term_req->fei)); 1025 } 1026 /* we may also need to dump some other info here */ 1027 } 1028 1029 static void 1030 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1031 struct nvme_tcp_pdu *pdu) 1032 { 1033 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1034 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1035 } 1036 1037 static void 1038 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1039 { 1040 struct nvme_tcp_pdu *pdu; 1041 1042 assert(tqpair != NULL); 1043 pdu = tqpair->recv_pdu; 1044 1045 switch (pdu->hdr.common.pdu_type) { 1046 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1047 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1048 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1049 break; 1050 1051 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1052 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1053 break; 1054 1055 default: 1056 /* The code should not go to here */ 1057 SPDK_ERRLOG("The code should not go to here\n"); 1058 break; 1059 } 1060 } 1061 1062 static void 1063 tcp_data_recv_crc32_done(void *cb_arg, int status) 1064 { 1065 struct nvme_tcp_req *tcp_req = cb_arg; 1066 struct nvme_tcp_pdu *pdu; 1067 struct nvme_tcp_qpair *tqpair; 1068 int rc; 1069 struct nvme_tcp_poll_group *pgroup; 1070 int dummy_reaped = 0; 1071 1072 pdu = tcp_req->pdu; 1073 assert(pdu != NULL); 1074 1075 tqpair = tcp_req->tqpair; 1076 assert(tqpair != NULL); 1077 1078 if (tqpair->qpair.poll_group && !tqpair->needs_poll) { 1079 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1080 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1081 tqpair->needs_poll = true; 1082 } 1083 1084 if (spdk_unlikely(status)) { 1085 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1086 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1087 goto end; 1088 } 1089 1090 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1091 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1092 if (rc == 0) { 1093 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1094 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1095 } 1096 1097 end: 1098 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1099 } 1100 1101 static void 1102 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1103 uint32_t *reaped) 1104 { 1105 int rc = 0; 1106 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1107 uint32_t crc32c; 1108 struct nvme_tcp_poll_group *tgroup; 1109 struct nvme_tcp_req *tcp_req = pdu->req; 1110 1111 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1112 SPDK_DEBUGLOG(nvme, "enter\n"); 1113 1114 /* The request can be NULL, e.g. in case of C2HTermReq */ 1115 if (spdk_likely(tcp_req != NULL)) { 1116 tcp_req->expected_datao += pdu->data_len; 1117 } 1118 1119 /* check data digest if need */ 1120 if (pdu->ddgst_enable) { 1121 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1122 assert(tcp_req != NULL); 1123 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1124 /* Only suport this limitated case that the request has only one c2h pdu */ 1125 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1126 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1127 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1128 && tcp_req->req->payload_size == pdu->data_len)) { 1129 tcp_req->pdu->hdr = pdu->hdr; 1130 tcp_req->pdu->req = tcp_req; 1131 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1132 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1133 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1134 tcp_req->pdu->data_len = pdu->data_len; 1135 1136 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1137 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1138 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1139 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1140 return; 1141 } 1142 1143 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1144 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1145 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1146 if (rc == 0) { 1147 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1148 tcp_req = pdu->req; 1149 assert(tcp_req != NULL); 1150 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1151 } 1152 } 1153 1154 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1155 } 1156 1157 static void 1158 nvme_tcp_send_icreq_complete(void *cb_arg) 1159 { 1160 struct nvme_tcp_qpair *tqpair = cb_arg; 1161 1162 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1163 1164 tqpair->flags.icreq_send_ack = true; 1165 1166 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1167 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1168 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1169 } 1170 } 1171 1172 static void 1173 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1174 struct nvme_tcp_pdu *pdu) 1175 { 1176 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1177 uint32_t error_offset = 0; 1178 enum spdk_nvme_tcp_term_req_fes fes; 1179 int recv_buf_size; 1180 1181 /* Only PFV 0 is defined currently */ 1182 if (ic_resp->pfv != 0) { 1183 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1184 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1185 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1186 goto end; 1187 } 1188 1189 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1190 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1191 ic_resp->maxh2cdata); 1192 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1193 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1194 goto end; 1195 } 1196 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1197 1198 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1199 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1200 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1201 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1202 goto end; 1203 } 1204 tqpair->cpda = ic_resp->cpda; 1205 1206 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1207 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1208 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1209 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1210 1211 /* Now that we know whether digests are enabled, properly size the receive buffer to 1212 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1213 * parameter. */ 1214 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1215 1216 if (tqpair->flags.host_hdgst_enable) { 1217 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1218 } 1219 1220 if (tqpair->flags.host_ddgst_enable) { 1221 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1222 } 1223 1224 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1225 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1226 tqpair, 1227 recv_buf_size); 1228 /* Not fatal. */ 1229 } 1230 1231 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1232 1233 if (!tqpair->flags.icreq_send_ack) { 1234 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1235 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1236 return; 1237 } 1238 1239 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1240 return; 1241 end: 1242 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1243 } 1244 1245 static void 1246 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1247 uint32_t *reaped) 1248 { 1249 struct nvme_tcp_req *tcp_req; 1250 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1251 uint32_t cid, error_offset = 0; 1252 enum spdk_nvme_tcp_term_req_fes fes; 1253 1254 SPDK_DEBUGLOG(nvme, "enter\n"); 1255 cid = capsule_resp->rccqe.cid; 1256 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1257 1258 if (!tcp_req) { 1259 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1260 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1261 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1262 goto end; 1263 } 1264 1265 assert(tcp_req->req != NULL); 1266 1267 tcp_req->rsp = capsule_resp->rccqe; 1268 tcp_req->ordering.bits.data_recv = 1; 1269 1270 /* Recv the pdu again */ 1271 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1272 1273 if (nvme_tcp_req_complete_safe(tcp_req)) { 1274 (*reaped)++; 1275 } 1276 1277 return; 1278 1279 end: 1280 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1281 } 1282 1283 static void 1284 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1285 struct nvme_tcp_pdu *pdu) 1286 { 1287 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1288 uint32_t error_offset = 0; 1289 enum spdk_nvme_tcp_term_req_fes fes; 1290 1291 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1292 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1293 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1294 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1295 goto end; 1296 } 1297 1298 /* set the data buffer */ 1299 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1300 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1301 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1302 return; 1303 end: 1304 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1305 } 1306 1307 static void 1308 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1309 { 1310 struct nvme_tcp_req *tcp_req; 1311 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1312 uint32_t error_offset = 0; 1313 enum spdk_nvme_tcp_term_req_fes fes; 1314 int flags = c2h_data->common.flags; 1315 1316 SPDK_DEBUGLOG(nvme, "enter\n"); 1317 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1318 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1319 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1320 if (!tcp_req) { 1321 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1322 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1323 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1324 goto end; 1325 1326 } 1327 1328 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1329 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1330 1331 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1332 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1333 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1334 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1335 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1336 goto end; 1337 } 1338 1339 if (c2h_data->datal > tcp_req->req->payload_size) { 1340 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1341 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1342 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1343 goto end; 1344 } 1345 1346 if (tcp_req->expected_datao != c2h_data->datao) { 1347 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1348 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1349 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1350 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1351 goto end; 1352 } 1353 1354 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1355 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1356 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1357 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1358 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1359 goto end; 1360 1361 } 1362 1363 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1364 c2h_data->datao, c2h_data->datal); 1365 pdu->req = tcp_req; 1366 1367 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1368 return; 1369 1370 end: 1371 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1372 } 1373 1374 static void 1375 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1376 { 1377 struct nvme_tcp_req *tcp_req = cb_arg; 1378 1379 assert(tcp_req != NULL); 1380 1381 tcp_req->ordering.bits.send_ack = 1; 1382 if (tcp_req->r2tl_remain) { 1383 nvme_tcp_send_h2c_data(tcp_req); 1384 } else { 1385 assert(tcp_req->active_r2ts > 0); 1386 tcp_req->active_r2ts--; 1387 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1388 1389 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1390 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1391 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1392 assert(tcp_req->active_r2ts > 0); 1393 tcp_req->ttag = tcp_req->ttag_r2t_next; 1394 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1395 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1396 nvme_tcp_send_h2c_data(tcp_req); 1397 return; 1398 } 1399 1400 /* Need also call this function to free the resource */ 1401 nvme_tcp_req_complete_safe(tcp_req); 1402 } 1403 } 1404 1405 static void 1406 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1407 { 1408 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1409 struct nvme_tcp_pdu *rsp_pdu; 1410 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1411 uint32_t plen, pdo, alignment; 1412 1413 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1414 tcp_req->ordering.bits.send_ack = 0; 1415 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1416 rsp_pdu = tcp_req->pdu; 1417 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1418 h2c_data = &rsp_pdu->hdr.h2c_data; 1419 1420 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1421 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1422 h2c_data->cccid = tcp_req->cid; 1423 h2c_data->ttag = tcp_req->ttag; 1424 h2c_data->datao = tcp_req->datao; 1425 1426 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1427 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1428 h2c_data->datao, h2c_data->datal); 1429 tcp_req->r2tl_remain -= h2c_data->datal; 1430 1431 if (tqpair->flags.host_hdgst_enable) { 1432 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1433 plen += SPDK_NVME_TCP_DIGEST_LEN; 1434 } 1435 1436 rsp_pdu->padding_len = 0; 1437 pdo = plen; 1438 if (tqpair->cpda) { 1439 alignment = (tqpair->cpda + 1) << 2; 1440 if (alignment > plen) { 1441 rsp_pdu->padding_len = alignment - plen; 1442 pdo = plen = alignment; 1443 } 1444 } 1445 1446 h2c_data->common.pdo = pdo; 1447 plen += h2c_data->datal; 1448 if (tqpair->flags.host_ddgst_enable) { 1449 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1450 plen += SPDK_NVME_TCP_DIGEST_LEN; 1451 } 1452 1453 h2c_data->common.plen = plen; 1454 tcp_req->datao += h2c_data->datal; 1455 if (!tcp_req->r2tl_remain) { 1456 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1457 } 1458 1459 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1460 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1461 1462 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1463 } 1464 1465 static void 1466 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1467 { 1468 struct nvme_tcp_req *tcp_req; 1469 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1470 uint32_t cid, error_offset = 0; 1471 enum spdk_nvme_tcp_term_req_fes fes; 1472 1473 SPDK_DEBUGLOG(nvme, "enter\n"); 1474 cid = r2t->cccid; 1475 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1476 if (!tcp_req) { 1477 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1478 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1479 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1480 goto end; 1481 } 1482 1483 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1484 tqpair); 1485 1486 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1487 assert(tcp_req->active_r2ts == 0); 1488 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1489 } 1490 1491 if (tcp_req->datao != r2t->r2to) { 1492 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1493 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1494 goto end; 1495 1496 } 1497 1498 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1499 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1500 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1501 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1502 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1503 goto end; 1504 } 1505 1506 tcp_req->active_r2ts++; 1507 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1508 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1509 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1510 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1511 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1512 tcp_req->ttag_r2t_next = r2t->ttag; 1513 tcp_req->r2tl_remain_next = r2t->r2tl; 1514 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1515 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1516 return; 1517 } else { 1518 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1519 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1520 tqpair); 1521 goto end; 1522 } 1523 } 1524 1525 tcp_req->ttag = r2t->ttag; 1526 tcp_req->r2tl_remain = r2t->r2tl; 1527 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1528 1529 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1530 nvme_tcp_send_h2c_data(tcp_req); 1531 } else { 1532 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1533 } 1534 1535 return; 1536 1537 end: 1538 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1539 1540 } 1541 1542 static void 1543 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1544 { 1545 struct nvme_tcp_pdu *pdu; 1546 int rc; 1547 uint32_t crc32c, error_offset = 0; 1548 enum spdk_nvme_tcp_term_req_fes fes; 1549 1550 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1551 pdu = tqpair->recv_pdu; 1552 1553 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1554 /* check header digest if needed */ 1555 if (pdu->has_hdgst) { 1556 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1557 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1558 if (rc == 0) { 1559 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1560 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1561 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1562 return; 1563 1564 } 1565 } 1566 1567 switch (pdu->hdr.common.pdu_type) { 1568 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1569 nvme_tcp_icresp_handle(tqpair, pdu); 1570 break; 1571 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1572 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1573 break; 1574 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1575 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1576 break; 1577 1578 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1579 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1580 break; 1581 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1582 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1583 break; 1584 1585 default: 1586 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1587 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1588 error_offset = 1; 1589 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1590 break; 1591 } 1592 1593 } 1594 1595 static int 1596 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions) 1597 { 1598 int rc = 0; 1599 struct nvme_tcp_pdu *pdu; 1600 uint32_t data_len; 1601 enum nvme_tcp_pdu_recv_state prev_state; 1602 1603 /* The loop here is to allow for several back-to-back state changes. */ 1604 do { 1605 prev_state = tqpair->recv_state; 1606 pdu = tqpair->recv_pdu; 1607 switch (tqpair->recv_state) { 1608 /* If in a new state */ 1609 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1610 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1611 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1612 break; 1613 /* Wait for the pdu common header */ 1614 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1615 assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)); 1616 rc = nvme_tcp_read_data(tqpair->sock, 1617 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1618 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1619 if (rc < 0) { 1620 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1621 break; 1622 } 1623 pdu->ch_valid_bytes += rc; 1624 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1625 rc = NVME_TCP_PDU_IN_PROGRESS; 1626 goto out; 1627 } 1628 1629 /* The command header of this PDU has now been read from the socket. */ 1630 nvme_tcp_pdu_ch_handle(tqpair); 1631 break; 1632 /* Wait for the pdu specific header */ 1633 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1634 assert(pdu->psh_valid_bytes < pdu->psh_len); 1635 rc = nvme_tcp_read_data(tqpair->sock, 1636 pdu->psh_len - pdu->psh_valid_bytes, 1637 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1638 if (rc < 0) { 1639 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1640 break; 1641 } 1642 1643 pdu->psh_valid_bytes += rc; 1644 if (pdu->psh_valid_bytes < pdu->psh_len) { 1645 rc = NVME_TCP_PDU_IN_PROGRESS; 1646 goto out; 1647 } 1648 1649 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1650 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1651 break; 1652 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1653 /* check whether the data is valid, if not we just return */ 1654 if (!pdu->data_len) { 1655 return NVME_TCP_PDU_IN_PROGRESS; 1656 } 1657 1658 data_len = pdu->data_len; 1659 /* data digest */ 1660 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1661 tqpair->flags.host_ddgst_enable)) { 1662 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1663 pdu->ddgst_enable = true; 1664 } 1665 1666 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1667 if (rc < 0) { 1668 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1669 break; 1670 } 1671 1672 pdu->rw_offset += rc; 1673 if (pdu->rw_offset < data_len) { 1674 rc = NVME_TCP_PDU_IN_PROGRESS; 1675 goto out; 1676 } 1677 1678 assert(pdu->rw_offset == data_len); 1679 /* All of this PDU has now been read from the socket. */ 1680 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1681 break; 1682 case NVME_TCP_PDU_RECV_STATE_ERROR: 1683 memset(pdu, 0, sizeof(struct nvme_tcp_pdu)); 1684 rc = NVME_TCP_PDU_FATAL; 1685 break; 1686 default: 1687 assert(0); 1688 break; 1689 } 1690 } while (prev_state != tqpair->recv_state && *reaped + tqpair->async_complete < max_completions); 1691 1692 out: 1693 *reaped += tqpair->async_complete; 1694 tqpair->async_complete = 0; 1695 1696 return rc; 1697 } 1698 1699 static void 1700 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1701 { 1702 uint64_t t02; 1703 struct nvme_tcp_req *tcp_req, *tmp; 1704 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1705 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1706 struct spdk_nvme_ctrlr_process *active_proc; 1707 1708 /* Don't check timeouts during controller initialization. */ 1709 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1710 return; 1711 } 1712 1713 if (nvme_qpair_is_admin_queue(qpair)) { 1714 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1715 } else { 1716 active_proc = qpair->active_proc; 1717 } 1718 1719 /* Only check timeouts if the current process has a timeout callback. */ 1720 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1721 return; 1722 } 1723 1724 t02 = spdk_get_ticks(); 1725 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1726 assert(tcp_req->req != NULL); 1727 1728 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1729 /* 1730 * The requests are in order, so as soon as one has not timed out, 1731 * stop iterating. 1732 */ 1733 break; 1734 } 1735 } 1736 } 1737 1738 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 1739 struct spdk_nvme_qpair *qpair); 1740 1741 static int 1742 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1743 { 1744 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1745 uint32_t reaped; 1746 int rc; 1747 1748 if (qpair->poll_group == NULL) { 1749 rc = spdk_sock_flush(tqpair->sock); 1750 if (rc < 0) { 1751 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1752 nvme_tcp_qpair_check_timeout(qpair); 1753 } 1754 return rc; 1755 } 1756 } 1757 1758 if (max_completions == 0) { 1759 max_completions = tqpair->num_entries; 1760 } else { 1761 max_completions = spdk_min(max_completions, tqpair->num_entries); 1762 } 1763 1764 reaped = 0; 1765 do { 1766 rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions); 1767 if (rc < 0) { 1768 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1769 errno, spdk_strerror(errno)); 1770 goto fail; 1771 } else if (rc == 0) { 1772 /* Partial PDU is read */ 1773 break; 1774 } 1775 1776 } while (reaped < max_completions); 1777 1778 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1779 nvme_tcp_qpair_check_timeout(qpair); 1780 } 1781 1782 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 1783 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 1784 if (rc != 0 && rc != -EAGAIN) { 1785 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 1786 goto fail; 1787 } else if (rc == 0) { 1788 /* Once the connection is completed, we can submit queued requests */ 1789 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 1790 } 1791 } 1792 1793 return reaped; 1794 fail: 1795 1796 /* 1797 * Since admin queues take the ctrlr_lock before entering this function, 1798 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1799 * to call the generic function which will take the lock for us. 1800 */ 1801 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1802 1803 if (nvme_qpair_is_admin_queue(qpair)) { 1804 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1805 } else { 1806 nvme_ctrlr_disconnect_qpair(qpair); 1807 } 1808 return -ENXIO; 1809 } 1810 1811 static void 1812 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1813 { 1814 struct spdk_nvme_qpair *qpair = ctx; 1815 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1816 int32_t num_completions; 1817 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1818 1819 if (tqpair->needs_poll) { 1820 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1821 tqpair->needs_poll = false; 1822 } 1823 1824 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1825 1826 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1827 pgroup->num_completions += num_completions; 1828 pgroup->stats.nvme_completions += num_completions; 1829 } else { 1830 pgroup->num_completions = -ENXIO; 1831 } 1832 } 1833 1834 static int 1835 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1836 { 1837 struct spdk_nvme_tcp_ic_req *ic_req; 1838 struct nvme_tcp_pdu *pdu; 1839 1840 pdu = tqpair->send_pdu; 1841 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1842 ic_req = &pdu->hdr.ic_req; 1843 1844 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1845 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1846 ic_req->pfv = 0; 1847 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1848 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1849 1850 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1851 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1852 1853 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1854 1855 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1856 return 0; 1857 } 1858 1859 static int 1860 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1861 { 1862 struct sockaddr_storage dst_addr; 1863 struct sockaddr_storage src_addr; 1864 int rc; 1865 struct nvme_tcp_qpair *tqpair; 1866 int family; 1867 long int port; 1868 char *sock_impl_name; 1869 struct spdk_sock_impl_opts impl_opts; 1870 size_t impl_opts_size = sizeof(impl_opts); 1871 struct spdk_sock_opts opts; 1872 1873 tqpair = nvme_tcp_qpair(qpair); 1874 1875 switch (ctrlr->trid.adrfam) { 1876 case SPDK_NVMF_ADRFAM_IPV4: 1877 family = AF_INET; 1878 break; 1879 case SPDK_NVMF_ADRFAM_IPV6: 1880 family = AF_INET6; 1881 break; 1882 default: 1883 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1884 rc = -1; 1885 return rc; 1886 } 1887 1888 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1889 1890 memset(&dst_addr, 0, sizeof(dst_addr)); 1891 1892 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1893 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1894 if (rc != 0) { 1895 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1896 return rc; 1897 } 1898 1899 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1900 memset(&src_addr, 0, sizeof(src_addr)); 1901 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1902 if (rc != 0) { 1903 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1904 return rc; 1905 } 1906 } 1907 1908 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1909 if (port <= 0 || port >= INT_MAX) { 1910 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1911 rc = -1; 1912 return rc; 1913 } 1914 1915 sock_impl_name = ctrlr->opts.psk[0] ? "ssl" : NULL; 1916 SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name); 1917 1918 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size); 1919 impl_opts.enable_ktls = false; 1920 impl_opts.tls_version = SPDK_TLS_VERSION_1_3; 1921 /* TODO: Change current PSK HEX string format to TLS PSK Interchange Format */ 1922 impl_opts.psk_key = ctrlr->opts.psk; 1923 /* TODO: generate identity from hostnqn instead */ 1924 impl_opts.psk_identity = "psk.spdk.io"; 1925 1926 opts.opts_size = sizeof(opts); 1927 spdk_sock_get_default_opts(&opts); 1928 opts.priority = ctrlr->trid.priority; 1929 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1930 if (ctrlr->opts.transport_ack_timeout) { 1931 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 1932 } 1933 if (sock_impl_name) { 1934 opts.impl_opts = &impl_opts; 1935 opts.impl_opts_size = sizeof(impl_opts); 1936 } 1937 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts); 1938 if (!tqpair->sock) { 1939 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1940 tqpair, ctrlr->trid.traddr, port); 1941 rc = -1; 1942 return rc; 1943 } 1944 1945 return 0; 1946 } 1947 1948 static int 1949 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1950 { 1951 struct nvme_tcp_qpair *tqpair; 1952 int rc; 1953 1954 tqpair = nvme_tcp_qpair(qpair); 1955 1956 /* Prevent this function from being called recursively, as it could lead to issues with 1957 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 1958 * call. 1959 */ 1960 if (tqpair->flags.in_connect_poll) { 1961 return -EAGAIN; 1962 } 1963 1964 tqpair->flags.in_connect_poll = 1; 1965 1966 switch (tqpair->state) { 1967 case NVME_TCP_QPAIR_STATE_INVALID: 1968 case NVME_TCP_QPAIR_STATE_INITIALIZING: 1969 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 1970 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1971 rc = -ETIMEDOUT; 1972 break; 1973 } 1974 rc = -EAGAIN; 1975 break; 1976 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1977 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 1978 if (rc < 0) { 1979 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1980 break; 1981 } 1982 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 1983 rc = -EAGAIN; 1984 break; 1985 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1986 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 1987 if (rc == 0) { 1988 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1989 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 1990 } else if (rc != -EAGAIN) { 1991 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 1992 } 1993 break; 1994 case NVME_TCP_QPAIR_STATE_RUNNING: 1995 rc = 0; 1996 break; 1997 default: 1998 assert(false); 1999 rc = -EINVAL; 2000 break; 2001 } 2002 2003 tqpair->flags.in_connect_poll = 0; 2004 return rc; 2005 } 2006 2007 static int 2008 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2009 { 2010 int rc = 0; 2011 struct nvme_tcp_qpair *tqpair; 2012 struct nvme_tcp_poll_group *tgroup; 2013 2014 tqpair = nvme_tcp_qpair(qpair); 2015 2016 if (!tqpair->sock) { 2017 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2018 if (rc < 0) { 2019 return rc; 2020 } 2021 } 2022 2023 if (qpair->poll_group) { 2024 rc = nvme_poll_group_connect_qpair(qpair); 2025 if (rc) { 2026 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2027 return rc; 2028 } 2029 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2030 tqpair->stats = &tgroup->stats; 2031 tqpair->shared_stats = true; 2032 } else { 2033 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2034 if (!tqpair->stats) { 2035 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2036 return -ENOMEM; 2037 } 2038 } 2039 2040 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2041 /* Explicitly set the state and recv_state of tqpair */ 2042 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2043 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2044 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2045 } 2046 rc = nvme_tcp_qpair_icreq_send(tqpair); 2047 if (rc != 0) { 2048 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2049 return rc; 2050 } 2051 2052 return rc; 2053 } 2054 2055 static struct spdk_nvme_qpair * 2056 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2057 uint16_t qid, uint32_t qsize, 2058 enum spdk_nvme_qprio qprio, 2059 uint32_t num_requests, bool async) 2060 { 2061 struct nvme_tcp_qpair *tqpair; 2062 struct spdk_nvme_qpair *qpair; 2063 int rc; 2064 2065 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2066 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2067 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2068 return NULL; 2069 } 2070 2071 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2072 if (!tqpair) { 2073 SPDK_ERRLOG("failed to get create tqpair\n"); 2074 return NULL; 2075 } 2076 2077 /* Set num_entries one less than queue size. According to NVMe 2078 * and NVMe-oF specs we can not submit queue size requests, 2079 * one slot shall always remain empty. 2080 */ 2081 tqpair->num_entries = qsize - 1; 2082 qpair = &tqpair->qpair; 2083 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2084 if (rc != 0) { 2085 free(tqpair); 2086 return NULL; 2087 } 2088 2089 rc = nvme_tcp_alloc_reqs(tqpair); 2090 if (rc) { 2091 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2092 return NULL; 2093 } 2094 2095 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2096 * So create the socket first when creating a qpair. */ 2097 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2098 if (rc) { 2099 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2100 return NULL; 2101 } 2102 2103 return qpair; 2104 } 2105 2106 static struct spdk_nvme_qpair * 2107 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2108 const struct spdk_nvme_io_qpair_opts *opts) 2109 { 2110 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2111 opts->io_queue_requests, opts->async_mode); 2112 } 2113 2114 /* We have to use the typedef in the function declaration to appease astyle. */ 2115 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2116 2117 static spdk_nvme_ctrlr_t * 2118 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2119 const struct spdk_nvme_ctrlr_opts *opts, 2120 void *devhandle) 2121 { 2122 struct nvme_tcp_ctrlr *tctrlr; 2123 int rc; 2124 2125 tctrlr = calloc(1, sizeof(*tctrlr)); 2126 if (tctrlr == NULL) { 2127 SPDK_ERRLOG("could not allocate ctrlr\n"); 2128 return NULL; 2129 } 2130 2131 tctrlr->ctrlr.opts = *opts; 2132 tctrlr->ctrlr.trid = *trid; 2133 2134 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2135 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2136 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2137 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2138 } 2139 2140 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2141 if (rc != 0) { 2142 free(tctrlr); 2143 return NULL; 2144 } 2145 2146 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2147 tctrlr->ctrlr.opts.admin_queue_size, 0, 2148 tctrlr->ctrlr.opts.admin_queue_size, true); 2149 if (!tctrlr->ctrlr.adminq) { 2150 SPDK_ERRLOG("failed to create admin qpair\n"); 2151 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2152 return NULL; 2153 } 2154 2155 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2156 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2157 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2158 return NULL; 2159 } 2160 2161 return &tctrlr->ctrlr; 2162 } 2163 2164 static uint32_t 2165 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2166 { 2167 /* TCP transport doesn't limit maximum IO transfer size. */ 2168 return UINT32_MAX; 2169 } 2170 2171 static uint16_t 2172 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2173 { 2174 /* 2175 * We do not support >1 SGE in the initiator currently, 2176 * so we can only return 1 here. Once that support is 2177 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 2178 * instead. 2179 */ 2180 return 1; 2181 } 2182 2183 static int 2184 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2185 int (*iter_fn)(struct nvme_request *req, void *arg), 2186 void *arg) 2187 { 2188 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2189 struct nvme_tcp_req *tcp_req, *tmp; 2190 int rc; 2191 2192 assert(iter_fn != NULL); 2193 2194 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2195 assert(tcp_req->req != NULL); 2196 2197 rc = iter_fn(tcp_req->req, arg); 2198 if (rc != 0) { 2199 return rc; 2200 } 2201 } 2202 2203 return 0; 2204 } 2205 2206 static void 2207 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2208 { 2209 struct nvme_tcp_req *tcp_req, *tmp; 2210 struct spdk_nvme_cpl cpl = {}; 2211 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2212 2213 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2214 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2215 2216 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2217 assert(tcp_req->req != NULL); 2218 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2219 continue; 2220 } 2221 2222 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2223 } 2224 } 2225 2226 static struct spdk_nvme_transport_poll_group * 2227 nvme_tcp_poll_group_create(void) 2228 { 2229 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2230 2231 if (group == NULL) { 2232 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2233 return NULL; 2234 } 2235 2236 TAILQ_INIT(&group->needs_poll); 2237 2238 group->sock_group = spdk_sock_group_create(group); 2239 if (group->sock_group == NULL) { 2240 free(group); 2241 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2242 return NULL; 2243 } 2244 2245 return &group->group; 2246 } 2247 2248 static struct spdk_nvme_transport_poll_group * 2249 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2250 { 2251 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2252 struct spdk_sock_group *group = NULL; 2253 int rc; 2254 2255 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2256 if (!rc && group != NULL) { 2257 return spdk_sock_group_get_ctx(group); 2258 } 2259 2260 return NULL; 2261 } 2262 2263 static int 2264 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2265 { 2266 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2267 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2268 2269 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2270 return -EPROTO; 2271 } 2272 return 0; 2273 } 2274 2275 static int 2276 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2277 { 2278 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2279 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2280 2281 if (tqpair->needs_poll) { 2282 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2283 tqpair->needs_poll = false; 2284 } 2285 2286 if (tqpair->sock && group->sock_group) { 2287 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2288 return -EPROTO; 2289 } 2290 } 2291 return 0; 2292 } 2293 2294 static int 2295 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2296 struct spdk_nvme_qpair *qpair) 2297 { 2298 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2299 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2300 2301 /* disconnected qpairs won't have a sock to add. */ 2302 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2303 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2304 return -EPROTO; 2305 } 2306 } 2307 2308 return 0; 2309 } 2310 2311 static int 2312 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2313 struct spdk_nvme_qpair *qpair) 2314 { 2315 struct nvme_tcp_qpair *tqpair; 2316 struct nvme_tcp_poll_group *group; 2317 2318 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2319 2320 tqpair = nvme_tcp_qpair(qpair); 2321 group = nvme_tcp_poll_group(tgroup); 2322 2323 assert(tqpair->shared_stats == true); 2324 tqpair->stats = &g_dummy_stats; 2325 2326 if (tqpair->needs_poll) { 2327 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2328 tqpair->needs_poll = false; 2329 } 2330 2331 return 0; 2332 } 2333 2334 static int64_t 2335 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2336 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2337 { 2338 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2339 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2340 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2341 int num_events; 2342 2343 group->completions_per_qpair = completions_per_qpair; 2344 group->num_completions = 0; 2345 group->stats.polls++; 2346 2347 num_events = spdk_sock_group_poll(group->sock_group); 2348 2349 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2350 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2351 } 2352 2353 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2354 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2355 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2356 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2357 } 2358 2359 if (spdk_unlikely(num_events < 0)) { 2360 return num_events; 2361 } 2362 2363 group->stats.idle_polls += !num_events; 2364 group->stats.socket_completions += num_events; 2365 2366 return group->num_completions; 2367 } 2368 2369 static int 2370 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2371 { 2372 int rc; 2373 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2374 2375 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2376 return -EBUSY; 2377 } 2378 2379 rc = spdk_sock_group_close(&group->sock_group); 2380 if (rc != 0) { 2381 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2382 assert(false); 2383 } 2384 2385 free(tgroup); 2386 2387 return 0; 2388 } 2389 2390 static int 2391 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2392 struct spdk_nvme_transport_poll_group_stat **_stats) 2393 { 2394 struct nvme_tcp_poll_group *group; 2395 struct spdk_nvme_transport_poll_group_stat *stats; 2396 2397 if (tgroup == NULL || _stats == NULL) { 2398 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2399 return -EINVAL; 2400 } 2401 2402 group = nvme_tcp_poll_group(tgroup); 2403 2404 stats = calloc(1, sizeof(*stats)); 2405 if (!stats) { 2406 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2407 return -ENOMEM; 2408 } 2409 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2410 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2411 2412 *_stats = stats; 2413 2414 return 0; 2415 } 2416 2417 static void 2418 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2419 struct spdk_nvme_transport_poll_group_stat *stats) 2420 { 2421 free(stats); 2422 } 2423 2424 const struct spdk_nvme_transport_ops tcp_ops = { 2425 .name = "TCP", 2426 .type = SPDK_NVME_TRANSPORT_TCP, 2427 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2428 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2429 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2430 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2431 2432 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2433 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2434 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2435 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2436 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2437 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2438 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2439 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2440 2441 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2442 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2443 2444 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2445 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2446 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2447 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2448 2449 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2450 .qpair_reset = nvme_tcp_qpair_reset, 2451 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2452 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2453 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2454 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2455 2456 .poll_group_create = nvme_tcp_poll_group_create, 2457 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2458 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2459 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2460 .poll_group_add = nvme_tcp_poll_group_add, 2461 .poll_group_remove = nvme_tcp_poll_group_remove, 2462 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2463 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2464 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2465 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2466 }; 2467 2468 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2469 2470 SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP) 2471 { 2472 struct spdk_trace_tpoint_opts opts[] = { 2473 { 2474 "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT, 2475 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1, 2476 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2477 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2478 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2479 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2480 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 2481 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2482 } 2483 }, 2484 { 2485 "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE, 2486 OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0, 2487 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 2488 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 2489 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 } 2490 } 2491 }, 2492 }; 2493 2494 spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p'); 2495 spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q'); 2496 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 2497 } 2498