1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/thread.h" 22 #include "spdk/trace.h" 23 #include "spdk/util.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 27 #define NVME_TCP_RW_BUFFER_SIZE 131072 28 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 29 30 #define NVME_TCP_HPDA_DEFAULT 0 31 #define NVME_TCP_MAX_R2T_DEFAULT 1 32 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 33 34 /* 35 * Maximum value of transport_ack_timeout used by TCP controller 36 */ 37 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 38 39 40 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 41 struct nvme_tcp_ctrlr { 42 struct spdk_nvme_ctrlr ctrlr; 43 }; 44 45 struct nvme_tcp_poll_group { 46 struct spdk_nvme_transport_poll_group group; 47 struct spdk_sock_group *sock_group; 48 uint32_t completions_per_qpair; 49 int64_t num_completions; 50 51 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 52 struct spdk_nvme_tcp_stat stats; 53 }; 54 55 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 56 struct nvme_tcp_qpair { 57 struct spdk_nvme_qpair qpair; 58 struct spdk_sock *sock; 59 60 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 61 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 62 63 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 64 struct nvme_tcp_pdu *recv_pdu; 65 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 66 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 67 enum nvme_tcp_pdu_recv_state recv_state; 68 struct nvme_tcp_req *tcp_reqs; 69 struct spdk_nvme_tcp_stat *stats; 70 71 uint16_t num_entries; 72 uint16_t async_complete; 73 74 struct { 75 uint16_t host_hdgst_enable: 1; 76 uint16_t host_ddgst_enable: 1; 77 uint16_t icreq_send_ack: 1; 78 uint16_t in_connect_poll: 1; 79 uint16_t reserved: 12; 80 } flags; 81 82 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 83 uint32_t maxh2cdata; 84 85 uint32_t maxr2t; 86 87 /* 0 based value, which is used to guide the padding */ 88 uint8_t cpda; 89 90 enum nvme_tcp_qpair_state state; 91 92 TAILQ_ENTRY(nvme_tcp_qpair) link; 93 bool needs_poll; 94 95 uint64_t icreq_timeout_tsc; 96 97 bool shared_stats; 98 }; 99 100 enum nvme_tcp_req_state { 101 NVME_TCP_REQ_FREE, 102 NVME_TCP_REQ_ACTIVE, 103 NVME_TCP_REQ_ACTIVE_R2T, 104 }; 105 106 struct nvme_tcp_req { 107 struct nvme_request *req; 108 enum nvme_tcp_req_state state; 109 uint16_t cid; 110 uint16_t ttag; 111 uint32_t datao; 112 uint32_t expected_datao; 113 uint32_t r2tl_remain; 114 uint32_t active_r2ts; 115 /* Used to hold a value received from subsequent R2T while we are still 116 * waiting for H2C complete */ 117 uint16_t ttag_r2t_next; 118 bool in_capsule_data; 119 bool pdu_in_use; 120 /* It is used to track whether the req can be safely freed */ 121 union { 122 uint8_t raw; 123 struct { 124 /* The last send operation completed - kernel released send buffer */ 125 uint8_t send_ack : 1; 126 /* Data transfer completed - target send resp or last data bit */ 127 uint8_t data_recv : 1; 128 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 129 * from kernel) to send H2C */ 130 uint8_t h2c_send_waiting_ack : 1; 131 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 132 * Rare case, actual when dealing with target that can send several R2T requests. 133 * SPDK TCP target sends 1 R2T for the whole data buffer */ 134 uint8_t r2t_waiting_h2c_complete : 1; 135 uint8_t reserved : 4; 136 } bits; 137 } ordering; 138 struct nvme_tcp_pdu *pdu; 139 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 140 uint32_t iovcnt; 141 /* Used to hold a value received from subsequent R2T while we are still 142 * waiting for H2C ack */ 143 uint32_t r2tl_remain_next; 144 struct nvme_tcp_qpair *tqpair; 145 TAILQ_ENTRY(nvme_tcp_req) link; 146 struct spdk_nvme_cpl rsp; 147 }; 148 149 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 150 151 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 152 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 153 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 154 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 155 156 static inline struct nvme_tcp_qpair * 157 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 158 { 159 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 160 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 161 } 162 163 static inline struct nvme_tcp_poll_group * 164 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 165 { 166 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 167 } 168 169 static inline struct nvme_tcp_ctrlr * 170 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 171 { 172 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 173 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 174 } 175 176 static struct nvme_tcp_req * 177 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 178 { 179 struct nvme_tcp_req *tcp_req; 180 181 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 182 if (!tcp_req) { 183 return NULL; 184 } 185 186 assert(tcp_req->state == NVME_TCP_REQ_FREE); 187 tcp_req->state = NVME_TCP_REQ_ACTIVE; 188 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 189 tcp_req->datao = 0; 190 tcp_req->expected_datao = 0; 191 tcp_req->req = NULL; 192 tcp_req->in_capsule_data = false; 193 tcp_req->pdu_in_use = false; 194 tcp_req->r2tl_remain = 0; 195 tcp_req->r2tl_remain_next = 0; 196 tcp_req->active_r2ts = 0; 197 tcp_req->iovcnt = 0; 198 tcp_req->ordering.raw = 0; 199 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 200 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 201 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 202 203 return tcp_req; 204 } 205 206 static void 207 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 208 { 209 assert(tcp_req->state != NVME_TCP_REQ_FREE); 210 tcp_req->state = NVME_TCP_REQ_FREE; 211 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 212 } 213 214 static int 215 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 216 { 217 struct addrinfo *res; 218 struct addrinfo hints; 219 int ret; 220 221 memset(&hints, 0, sizeof(hints)); 222 hints.ai_family = family; 223 hints.ai_socktype = SOCK_STREAM; 224 hints.ai_protocol = 0; 225 226 ret = getaddrinfo(addr, service, &hints, &res); 227 if (ret) { 228 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 229 return ret; 230 } 231 232 if (res->ai_addrlen > sizeof(*sa)) { 233 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 234 ret = -EINVAL; 235 } else { 236 memcpy(sa, res->ai_addr, res->ai_addrlen); 237 } 238 239 freeaddrinfo(res); 240 return ret; 241 } 242 243 static void 244 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 245 { 246 free(tqpair->tcp_reqs); 247 tqpair->tcp_reqs = NULL; 248 249 spdk_free(tqpair->send_pdus); 250 tqpair->send_pdus = NULL; 251 } 252 253 static int 254 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 255 { 256 uint16_t i; 257 struct nvme_tcp_req *tcp_req; 258 259 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 260 if (tqpair->tcp_reqs == NULL) { 261 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 262 goto fail; 263 } 264 265 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 266 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 267 0x1000, NULL, 268 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 269 270 if (tqpair->send_pdus == NULL) { 271 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 272 goto fail; 273 } 274 275 TAILQ_INIT(&tqpair->send_queue); 276 TAILQ_INIT(&tqpair->free_reqs); 277 TAILQ_INIT(&tqpair->outstanding_reqs); 278 for (i = 0; i < tqpair->num_entries; i++) { 279 tcp_req = &tqpair->tcp_reqs[i]; 280 tcp_req->cid = i; 281 tcp_req->tqpair = tqpair; 282 tcp_req->pdu = &tqpair->send_pdus[i]; 283 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 284 } 285 286 tqpair->send_pdu = &tqpair->send_pdus[i]; 287 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 288 289 return 0; 290 fail: 291 nvme_tcp_free_reqs(tqpair); 292 return -ENOMEM; 293 } 294 295 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 296 297 static void 298 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 299 { 300 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 301 struct nvme_tcp_pdu *pdu; 302 int rc; 303 struct nvme_tcp_poll_group *group; 304 305 if (tqpair->needs_poll) { 306 group = nvme_tcp_poll_group(qpair->poll_group); 307 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 308 tqpair->needs_poll = false; 309 } 310 311 rc = spdk_sock_close(&tqpair->sock); 312 313 if (tqpair->sock != NULL) { 314 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 315 /* Set it to NULL manually */ 316 tqpair->sock = NULL; 317 } 318 319 /* clear the send_queue */ 320 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 321 pdu = TAILQ_FIRST(&tqpair->send_queue); 322 /* Remove the pdu from the send_queue to prevent the wrong sending out 323 * in the next round connection 324 */ 325 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 326 } 327 328 nvme_tcp_qpair_abort_reqs(qpair, 0); 329 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 330 } 331 332 static int 333 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 334 { 335 struct nvme_tcp_qpair *tqpair; 336 337 assert(qpair != NULL); 338 nvme_tcp_qpair_abort_reqs(qpair, 0); 339 nvme_qpair_deinit(qpair); 340 tqpair = nvme_tcp_qpair(qpair); 341 nvme_tcp_free_reqs(tqpair); 342 if (!tqpair->shared_stats) { 343 free(tqpair->stats); 344 } 345 free(tqpair); 346 347 return 0; 348 } 349 350 static int 351 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 352 { 353 return 0; 354 } 355 356 static int 357 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 358 { 359 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 360 361 if (ctrlr->adminq) { 362 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 363 } 364 365 nvme_ctrlr_destruct_finish(ctrlr); 366 367 free(tctrlr); 368 369 return 0; 370 } 371 372 static void 373 _pdu_write_done(void *cb_arg, int err) 374 { 375 struct nvme_tcp_pdu *pdu = cb_arg; 376 struct nvme_tcp_qpair *tqpair = pdu->qpair; 377 struct nvme_tcp_poll_group *pgroup; 378 379 /* If there are queued requests, we assume they are queued because they are waiting 380 * for resources to be released. Those resources are almost certainly released in 381 * response to a PDU completing here. However, to attempt to make forward progress 382 * the qpair needs to be polled and we can't rely on another network event to make 383 * that happen. Add it to a list of qpairs to poll regardless of network activity 384 * here. 385 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 386 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 387 * forward progress in case that the resources are released after icreq's or CONNECT's 388 * resp is processed. */ 389 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 390 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 391 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 392 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 393 394 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 395 tqpair->needs_poll = true; 396 } 397 398 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 399 400 if (err != 0) { 401 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 402 return; 403 } 404 405 assert(pdu->cb_fn != NULL); 406 pdu->cb_fn(pdu->cb_arg); 407 } 408 409 static void 410 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 411 { 412 uint32_t mapped_length = 0; 413 struct nvme_tcp_qpair *tqpair = pdu->qpair; 414 415 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 416 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 417 &mapped_length); 418 pdu->sock_req.cb_fn = _pdu_write_done; 419 pdu->sock_req.cb_arg = pdu; 420 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 421 tqpair->stats->submitted_requests++; 422 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 423 } 424 425 static void 426 data_crc32_accel_done(void *cb_arg, int status) 427 { 428 struct nvme_tcp_pdu *pdu = cb_arg; 429 430 if (spdk_unlikely(status)) { 431 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 432 _pdu_write_done(pdu, status); 433 return; 434 } 435 436 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 437 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 438 439 _tcp_write_pdu(pdu); 440 } 441 442 static void 443 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 444 { 445 struct nvme_tcp_qpair *tqpair = pdu->qpair; 446 uint32_t crc32c; 447 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 448 449 /* Data Digest */ 450 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 451 tqpair->flags.host_ddgst_enable) { 452 /* Only suport this limited case for the first step */ 453 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 454 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 455 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 456 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 457 &pdu->data_digest_crc32, pdu->data_iov, 458 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 459 return; 460 } 461 462 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 463 crc32c = crc32c ^ SPDK_CRC32C_XOR; 464 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 465 } 466 467 _tcp_write_pdu(pdu); 468 } 469 470 static int 471 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 472 struct nvme_tcp_pdu *pdu, 473 nvme_tcp_qpair_xfer_complete_cb cb_fn, 474 void *cb_arg) 475 { 476 int hlen; 477 uint32_t crc32c; 478 479 hlen = pdu->hdr.common.hlen; 480 pdu->cb_fn = cb_fn; 481 pdu->cb_arg = cb_arg; 482 pdu->qpair = tqpair; 483 484 /* Header Digest */ 485 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 486 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 487 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 488 } 489 490 pdu_data_crc32_compute(pdu); 491 492 return 0; 493 } 494 495 /* 496 * Build SGL describing contiguous payload buffer. 497 */ 498 static int 499 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 500 { 501 struct nvme_request *req = tcp_req->req; 502 503 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 504 tcp_req->iov[0].iov_len = req->payload_size; 505 tcp_req->iovcnt = 1; 506 507 SPDK_DEBUGLOG(nvme, "enter\n"); 508 509 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 510 511 return 0; 512 } 513 514 /* 515 * Build SGL describing scattered payload buffer. 516 */ 517 static int 518 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 519 { 520 int rc; 521 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 522 struct nvme_request *req = tcp_req->req; 523 524 SPDK_DEBUGLOG(nvme, "enter\n"); 525 526 assert(req->payload_size != 0); 527 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 528 assert(req->payload.reset_sgl_fn != NULL); 529 assert(req->payload.next_sge_fn != NULL); 530 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 531 532 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 533 remaining_size = req->payload_size; 534 535 do { 536 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 537 &length); 538 if (rc) { 539 return -1; 540 } 541 542 length = spdk_min(length, remaining_size); 543 tcp_req->iov[iovcnt].iov_len = length; 544 remaining_size -= length; 545 iovcnt++; 546 } while (remaining_size > 0 && iovcnt < max_num_sgl); 547 548 549 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 550 if (remaining_size > 0) { 551 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 552 tcp_req, iovcnt, remaining_size); 553 return -1; 554 } 555 556 tcp_req->iovcnt = iovcnt; 557 558 return 0; 559 } 560 561 static int 562 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 563 struct nvme_tcp_req *tcp_req) 564 { 565 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 566 int rc = 0; 567 enum spdk_nvme_data_transfer xfer; 568 uint32_t max_in_capsule_data_size; 569 570 tcp_req->req = req; 571 req->cmd.cid = tcp_req->cid; 572 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 573 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 574 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 575 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 576 577 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 578 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 579 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 580 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 581 } else { 582 rc = -1; 583 } 584 585 if (rc) { 586 return rc; 587 } 588 589 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 590 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 591 592 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 593 } else { 594 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 595 } 596 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 597 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 598 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 599 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 600 } 601 602 if (req->payload_size <= max_in_capsule_data_size) { 603 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 604 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 605 req->cmd.dptr.sgl1.address = 0; 606 tcp_req->in_capsule_data = true; 607 } 608 } 609 610 return 0; 611 } 612 613 static inline bool 614 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 615 { 616 struct spdk_nvme_cpl cpl; 617 spdk_nvme_cmd_cb user_cb; 618 void *user_cb_arg; 619 struct spdk_nvme_qpair *qpair; 620 struct nvme_request *req; 621 622 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 623 return false; 624 } 625 626 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 627 assert(tcp_req->tqpair != NULL); 628 assert(tcp_req->req != NULL); 629 630 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 631 632 if (!tcp_req->tqpair->qpair.in_completion_context) { 633 tcp_req->tqpair->async_complete++; 634 } 635 636 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 637 memcpy(&cpl, &tcp_req->rsp, sizeof(cpl)); 638 user_cb = tcp_req->req->cb_fn; 639 user_cb_arg = tcp_req->req->cb_arg; 640 qpair = tcp_req->req->qpair; 641 req = tcp_req->req; 642 643 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 644 nvme_tcp_req_put(tcp_req->tqpair, tcp_req); 645 nvme_free_request(tcp_req->req); 646 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 647 648 return true; 649 } 650 651 static void 652 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 653 { 654 struct nvme_tcp_req *tcp_req = cb_arg; 655 656 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 657 tcp_req->tqpair->qpair.id); 658 tcp_req->ordering.bits.send_ack = 1; 659 /* Handle the r2t case */ 660 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 661 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 662 nvme_tcp_send_h2c_data(tcp_req); 663 } else { 664 nvme_tcp_req_complete_safe(tcp_req); 665 } 666 } 667 668 static int 669 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 670 struct nvme_tcp_req *tcp_req) 671 { 672 struct nvme_tcp_pdu *pdu; 673 struct spdk_nvme_tcp_cmd *capsule_cmd; 674 uint32_t plen = 0, alignment; 675 uint8_t pdo; 676 677 SPDK_DEBUGLOG(nvme, "enter\n"); 678 pdu = tcp_req->pdu; 679 680 capsule_cmd = &pdu->hdr.capsule_cmd; 681 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 682 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 683 capsule_cmd->ccsqe = tcp_req->req->cmd; 684 685 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 686 687 if (tqpair->flags.host_hdgst_enable) { 688 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 689 tcp_req); 690 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 691 plen += SPDK_NVME_TCP_DIGEST_LEN; 692 } 693 694 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 695 goto end; 696 } 697 698 pdo = plen; 699 pdu->padding_len = 0; 700 if (tqpair->cpda) { 701 alignment = (tqpair->cpda + 1) << 2; 702 if (alignment > plen) { 703 pdu->padding_len = alignment - plen; 704 pdo = alignment; 705 plen = alignment; 706 } 707 } 708 709 capsule_cmd->common.pdo = pdo; 710 plen += tcp_req->req->payload_size; 711 if (tqpair->flags.host_ddgst_enable) { 712 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 713 plen += SPDK_NVME_TCP_DIGEST_LEN; 714 } 715 716 tcp_req->datao = 0; 717 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 718 0, tcp_req->req->payload_size); 719 end: 720 capsule_cmd->common.plen = plen; 721 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 722 723 } 724 725 static int 726 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 727 struct nvme_request *req) 728 { 729 struct nvme_tcp_qpair *tqpair; 730 struct nvme_tcp_req *tcp_req; 731 732 tqpair = nvme_tcp_qpair(qpair); 733 assert(tqpair != NULL); 734 assert(req != NULL); 735 736 tcp_req = nvme_tcp_req_get(tqpair); 737 if (!tcp_req) { 738 tqpair->stats->queued_requests++; 739 /* Inform the upper layer to try again later. */ 740 return -EAGAIN; 741 } 742 743 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 744 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 745 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 746 nvme_tcp_req_put(tqpair, tcp_req); 747 return -1; 748 } 749 750 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 751 } 752 753 static int 754 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 755 { 756 return 0; 757 } 758 759 static void 760 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 761 struct spdk_nvme_cpl *rsp) 762 { 763 struct nvme_request *req; 764 765 assert(tcp_req->req != NULL); 766 req = tcp_req->req; 767 768 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 769 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 770 nvme_free_request(req); 771 } 772 773 static void 774 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 775 { 776 struct nvme_tcp_req *tcp_req, *tmp; 777 struct spdk_nvme_cpl cpl; 778 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 779 780 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 781 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 782 cpl.status.dnr = dnr; 783 784 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 785 nvme_tcp_req_complete(tcp_req, &cpl); 786 nvme_tcp_req_put(tqpair, tcp_req); 787 } 788 } 789 790 static void 791 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 792 enum nvme_tcp_pdu_recv_state state) 793 { 794 if (tqpair->recv_state == state) { 795 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 796 tqpair, state); 797 return; 798 } 799 800 tqpair->recv_state = state; 801 switch (state) { 802 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 803 case NVME_TCP_PDU_RECV_STATE_ERROR: 804 memset(tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 805 break; 806 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 807 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 808 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 809 default: 810 break; 811 } 812 } 813 814 static void 815 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 816 { 817 struct nvme_tcp_qpair *tqpair = cb_arg; 818 819 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 820 } 821 822 static void 823 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 824 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 825 { 826 struct nvme_tcp_pdu *rsp_pdu; 827 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 828 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 829 uint8_t copy_len; 830 831 rsp_pdu = tqpair->send_pdu; 832 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 833 h2c_term_req = &rsp_pdu->hdr.term_req; 834 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 835 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 836 837 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 838 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 839 DSET32(&h2c_term_req->fei, error_offset); 840 } 841 842 copy_len = pdu->hdr.common.hlen; 843 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 844 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 845 } 846 847 /* Copy the error info into the buffer */ 848 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 849 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 850 851 /* Contain the header len of the wrong received pdu */ 852 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 853 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 854 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 855 } 856 857 static bool 858 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 859 { 860 switch (tqpair->state) { 861 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 862 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 863 case NVME_TCP_QPAIR_STATE_RUNNING: 864 return true; 865 default: 866 return false; 867 } 868 } 869 870 static void 871 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 872 { 873 struct nvme_tcp_pdu *pdu; 874 uint32_t error_offset = 0; 875 enum spdk_nvme_tcp_term_req_fes fes; 876 uint32_t expected_hlen, hd_len = 0; 877 bool plen_error = false; 878 879 pdu = tqpair->recv_pdu; 880 881 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 882 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 883 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 884 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 885 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 886 goto err; 887 } 888 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 889 if (pdu->hdr.common.plen != expected_hlen) { 890 plen_error = true; 891 } 892 } else { 893 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 894 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 895 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 896 goto err; 897 } 898 899 switch (pdu->hdr.common.pdu_type) { 900 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 901 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 902 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 903 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 904 } 905 906 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 907 plen_error = true; 908 } 909 break; 910 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 911 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 912 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 913 plen_error = true; 914 } 915 break; 916 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 917 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 918 if ((pdu->hdr.common.plen <= expected_hlen) || 919 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 920 plen_error = true; 921 } 922 break; 923 case SPDK_NVME_TCP_PDU_TYPE_R2T: 924 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 925 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 926 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 927 } 928 929 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 930 plen_error = true; 931 } 932 break; 933 934 default: 935 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 936 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 937 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 938 goto err; 939 } 940 } 941 942 if (pdu->hdr.common.hlen != expected_hlen) { 943 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 944 expected_hlen, pdu->hdr.common.hlen); 945 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 946 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 947 goto err; 948 949 } else if (plen_error) { 950 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 951 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 952 goto err; 953 } else { 954 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 955 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 956 return; 957 } 958 err: 959 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 960 } 961 962 static struct nvme_tcp_req * 963 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 964 { 965 assert(tqpair != NULL); 966 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 967 return NULL; 968 } 969 970 return &tqpair->tcp_reqs[cid]; 971 } 972 973 static void 974 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 975 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 976 { 977 struct nvme_tcp_req *tcp_req; 978 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 979 uint8_t flags; 980 981 tcp_req = pdu->req; 982 assert(tcp_req != NULL); 983 984 SPDK_DEBUGLOG(nvme, "enter\n"); 985 c2h_data = &pdu->hdr.c2h_data; 986 tcp_req->datao += pdu->data_len; 987 flags = c2h_data->common.flags; 988 989 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 990 if (tcp_req->datao == tcp_req->req->payload_size) { 991 tcp_req->rsp.status.p = 0; 992 } else { 993 tcp_req->rsp.status.p = 1; 994 } 995 996 tcp_req->rsp.cid = tcp_req->cid; 997 tcp_req->rsp.sqid = tqpair->qpair.id; 998 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 999 tcp_req->ordering.bits.data_recv = 1; 1000 if (nvme_tcp_req_complete_safe(tcp_req)) { 1001 (*reaped)++; 1002 } 1003 } 1004 } 1005 } 1006 1007 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1008 "Invalid PDU Header Field", 1009 "PDU Sequence Error", 1010 "Header Digest Error", 1011 "Data Transfer Out of Range", 1012 "Data Transfer Limit Exceeded", 1013 "Unsupported parameter", 1014 }; 1015 1016 static void 1017 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1018 { 1019 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1020 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1021 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1022 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1023 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1024 DGET32(c2h_term_req->fei)); 1025 } 1026 /* we may also need to dump some other info here */ 1027 } 1028 1029 static void 1030 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1031 struct nvme_tcp_pdu *pdu) 1032 { 1033 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1034 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1035 } 1036 1037 static void 1038 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1039 { 1040 struct nvme_tcp_pdu *pdu; 1041 1042 assert(tqpair != NULL); 1043 pdu = tqpair->recv_pdu; 1044 1045 switch (pdu->hdr.common.pdu_type) { 1046 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1047 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1048 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1049 break; 1050 1051 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1052 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1053 break; 1054 1055 default: 1056 /* The code should not go to here */ 1057 SPDK_ERRLOG("The code should not go to here\n"); 1058 break; 1059 } 1060 } 1061 1062 static void 1063 tcp_data_recv_crc32_done(void *cb_arg, int status) 1064 { 1065 struct nvme_tcp_req *tcp_req = cb_arg; 1066 struct nvme_tcp_pdu *pdu; 1067 struct nvme_tcp_qpair *tqpair; 1068 int rc; 1069 struct nvme_tcp_poll_group *pgroup; 1070 int dummy_reaped = 0; 1071 1072 pdu = tcp_req->pdu; 1073 assert(pdu != NULL); 1074 1075 tqpair = tcp_req->tqpair; 1076 assert(tqpair != NULL); 1077 1078 if (tqpair->qpair.poll_group && !tqpair->needs_poll) { 1079 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1080 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1081 tqpair->needs_poll = true; 1082 } 1083 1084 if (spdk_unlikely(status)) { 1085 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1086 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1087 goto end; 1088 } 1089 1090 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1091 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1092 if (rc == 0) { 1093 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1094 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1095 } 1096 1097 end: 1098 tcp_req->pdu_in_use = false; 1099 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1100 } 1101 1102 static void 1103 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1104 uint32_t *reaped) 1105 { 1106 int rc = 0; 1107 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1108 uint32_t crc32c; 1109 struct nvme_tcp_poll_group *tgroup; 1110 struct nvme_tcp_req *tcp_req = pdu->req; 1111 1112 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1113 SPDK_DEBUGLOG(nvme, "enter\n"); 1114 1115 /* The request can be NULL, e.g. in case of C2HTermReq */ 1116 if (spdk_likely(tcp_req != NULL)) { 1117 tcp_req->expected_datao += pdu->data_len; 1118 } 1119 1120 /* check data digest if need */ 1121 if (pdu->ddgst_enable) { 1122 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1123 assert(tcp_req != NULL); 1124 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1125 /* Only suport this limitated case for the first step */ 1126 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1127 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1128 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1129 && !tcp_req->pdu_in_use)) { 1130 1131 tcp_req->pdu_in_use = true; 1132 tcp_req->pdu->hdr = pdu->hdr; 1133 tcp_req->pdu->req = tcp_req; 1134 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1135 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1136 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1137 tcp_req->pdu->data_len = pdu->data_len; 1138 1139 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1140 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1141 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1142 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1143 return; 1144 } 1145 1146 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1147 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1148 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1149 if (rc == 0) { 1150 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1151 tcp_req = pdu->req; 1152 assert(tcp_req != NULL); 1153 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1154 } 1155 } 1156 1157 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1158 } 1159 1160 static void 1161 nvme_tcp_send_icreq_complete(void *cb_arg) 1162 { 1163 struct nvme_tcp_qpair *tqpair = cb_arg; 1164 1165 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1166 1167 tqpair->flags.icreq_send_ack = true; 1168 1169 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1170 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1171 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1172 } 1173 } 1174 1175 static void 1176 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1177 struct nvme_tcp_pdu *pdu) 1178 { 1179 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1180 uint32_t error_offset = 0; 1181 enum spdk_nvme_tcp_term_req_fes fes; 1182 int recv_buf_size; 1183 1184 /* Only PFV 0 is defined currently */ 1185 if (ic_resp->pfv != 0) { 1186 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1187 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1188 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1189 goto end; 1190 } 1191 1192 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1193 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1194 ic_resp->maxh2cdata); 1195 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1196 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1197 goto end; 1198 } 1199 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1200 1201 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1202 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1203 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1204 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1205 goto end; 1206 } 1207 tqpair->cpda = ic_resp->cpda; 1208 1209 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1210 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1211 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1212 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1213 1214 /* Now that we know whether digests are enabled, properly size the receive buffer to 1215 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1216 * parameter. */ 1217 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1218 1219 if (tqpair->flags.host_hdgst_enable) { 1220 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1221 } 1222 1223 if (tqpair->flags.host_ddgst_enable) { 1224 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1225 } 1226 1227 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1228 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1229 tqpair, 1230 recv_buf_size); 1231 /* Not fatal. */ 1232 } 1233 1234 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1235 1236 if (!tqpair->flags.icreq_send_ack) { 1237 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1238 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1239 return; 1240 } 1241 1242 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1243 return; 1244 end: 1245 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1246 } 1247 1248 static void 1249 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1250 uint32_t *reaped) 1251 { 1252 struct nvme_tcp_req *tcp_req; 1253 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1254 uint32_t cid, error_offset = 0; 1255 enum spdk_nvme_tcp_term_req_fes fes; 1256 1257 SPDK_DEBUGLOG(nvme, "enter\n"); 1258 cid = capsule_resp->rccqe.cid; 1259 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1260 1261 if (!tcp_req) { 1262 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1263 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1264 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1265 goto end; 1266 } 1267 1268 assert(tcp_req->req != NULL); 1269 1270 tcp_req->rsp = capsule_resp->rccqe; 1271 tcp_req->ordering.bits.data_recv = 1; 1272 1273 /* Recv the pdu again */ 1274 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1275 1276 if (nvme_tcp_req_complete_safe(tcp_req)) { 1277 (*reaped)++; 1278 } 1279 1280 return; 1281 1282 end: 1283 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1284 } 1285 1286 static void 1287 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1288 struct nvme_tcp_pdu *pdu) 1289 { 1290 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1291 uint32_t error_offset = 0; 1292 enum spdk_nvme_tcp_term_req_fes fes; 1293 1294 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1295 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1296 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1297 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1298 goto end; 1299 } 1300 1301 /* set the data buffer */ 1302 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1303 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1304 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1305 return; 1306 end: 1307 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1308 } 1309 1310 static void 1311 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1312 { 1313 struct nvme_tcp_req *tcp_req; 1314 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1315 uint32_t error_offset = 0; 1316 enum spdk_nvme_tcp_term_req_fes fes; 1317 int flags = c2h_data->common.flags; 1318 1319 SPDK_DEBUGLOG(nvme, "enter\n"); 1320 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1321 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1322 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1323 if (!tcp_req) { 1324 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1325 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1326 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1327 goto end; 1328 1329 } 1330 1331 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1332 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1333 1334 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1335 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1336 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1337 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1338 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1339 goto end; 1340 } 1341 1342 if (c2h_data->datal > tcp_req->req->payload_size) { 1343 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1344 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1345 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1346 goto end; 1347 } 1348 1349 if (tcp_req->expected_datao != c2h_data->datao) { 1350 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1351 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1352 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1353 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1354 goto end; 1355 } 1356 1357 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1358 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1359 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1360 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1361 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1362 goto end; 1363 1364 } 1365 1366 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1367 c2h_data->datao, c2h_data->datal); 1368 pdu->req = tcp_req; 1369 1370 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1371 return; 1372 1373 end: 1374 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1375 } 1376 1377 static void 1378 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1379 { 1380 struct nvme_tcp_req *tcp_req = cb_arg; 1381 1382 assert(tcp_req != NULL); 1383 1384 tcp_req->ordering.bits.send_ack = 1; 1385 if (tcp_req->r2tl_remain) { 1386 nvme_tcp_send_h2c_data(tcp_req); 1387 } else { 1388 assert(tcp_req->active_r2ts > 0); 1389 tcp_req->active_r2ts--; 1390 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1391 1392 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1393 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1394 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1395 assert(tcp_req->active_r2ts > 0); 1396 tcp_req->ttag = tcp_req->ttag_r2t_next; 1397 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1398 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1399 nvme_tcp_send_h2c_data(tcp_req); 1400 return; 1401 } 1402 1403 /* Need also call this function to free the resource */ 1404 nvme_tcp_req_complete_safe(tcp_req); 1405 } 1406 } 1407 1408 static void 1409 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1410 { 1411 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1412 struct nvme_tcp_pdu *rsp_pdu; 1413 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1414 uint32_t plen, pdo, alignment; 1415 1416 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1417 tcp_req->ordering.bits.send_ack = 0; 1418 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1419 rsp_pdu = tcp_req->pdu; 1420 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1421 h2c_data = &rsp_pdu->hdr.h2c_data; 1422 1423 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1424 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1425 h2c_data->cccid = tcp_req->cid; 1426 h2c_data->ttag = tcp_req->ttag; 1427 h2c_data->datao = tcp_req->datao; 1428 1429 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1430 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1431 h2c_data->datao, h2c_data->datal); 1432 tcp_req->r2tl_remain -= h2c_data->datal; 1433 1434 if (tqpair->flags.host_hdgst_enable) { 1435 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1436 plen += SPDK_NVME_TCP_DIGEST_LEN; 1437 } 1438 1439 rsp_pdu->padding_len = 0; 1440 pdo = plen; 1441 if (tqpair->cpda) { 1442 alignment = (tqpair->cpda + 1) << 2; 1443 if (alignment > plen) { 1444 rsp_pdu->padding_len = alignment - plen; 1445 pdo = plen = alignment; 1446 } 1447 } 1448 1449 h2c_data->common.pdo = pdo; 1450 plen += h2c_data->datal; 1451 if (tqpair->flags.host_ddgst_enable) { 1452 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1453 plen += SPDK_NVME_TCP_DIGEST_LEN; 1454 } 1455 1456 h2c_data->common.plen = plen; 1457 tcp_req->datao += h2c_data->datal; 1458 if (!tcp_req->r2tl_remain) { 1459 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1460 } 1461 1462 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1463 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1464 1465 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1466 } 1467 1468 static void 1469 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1470 { 1471 struct nvme_tcp_req *tcp_req; 1472 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1473 uint32_t cid, error_offset = 0; 1474 enum spdk_nvme_tcp_term_req_fes fes; 1475 1476 SPDK_DEBUGLOG(nvme, "enter\n"); 1477 cid = r2t->cccid; 1478 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1479 if (!tcp_req) { 1480 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1481 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1482 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1483 goto end; 1484 } 1485 1486 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1487 tqpair); 1488 1489 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1490 assert(tcp_req->active_r2ts == 0); 1491 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1492 } 1493 1494 if (tcp_req->datao != r2t->r2to) { 1495 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1496 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1497 goto end; 1498 1499 } 1500 1501 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1502 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1503 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1504 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1505 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1506 goto end; 1507 } 1508 1509 tcp_req->active_r2ts++; 1510 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1511 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1512 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1513 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1514 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1515 tcp_req->ttag_r2t_next = r2t->ttag; 1516 tcp_req->r2tl_remain_next = r2t->r2tl; 1517 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1518 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1519 return; 1520 } else { 1521 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1522 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1523 tqpair); 1524 goto end; 1525 } 1526 } 1527 1528 tcp_req->ttag = r2t->ttag; 1529 tcp_req->r2tl_remain = r2t->r2tl; 1530 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1531 1532 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1533 nvme_tcp_send_h2c_data(tcp_req); 1534 } else { 1535 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1536 } 1537 1538 return; 1539 1540 end: 1541 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1542 1543 } 1544 1545 static void 1546 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1547 { 1548 struct nvme_tcp_pdu *pdu; 1549 int rc; 1550 uint32_t crc32c, error_offset = 0; 1551 enum spdk_nvme_tcp_term_req_fes fes; 1552 1553 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1554 pdu = tqpair->recv_pdu; 1555 1556 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1557 /* check header digest if needed */ 1558 if (pdu->has_hdgst) { 1559 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1560 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1561 if (rc == 0) { 1562 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1563 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1564 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1565 return; 1566 1567 } 1568 } 1569 1570 switch (pdu->hdr.common.pdu_type) { 1571 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1572 nvme_tcp_icresp_handle(tqpair, pdu); 1573 break; 1574 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1575 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1576 break; 1577 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1578 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1579 break; 1580 1581 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1582 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1583 break; 1584 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1585 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1586 break; 1587 1588 default: 1589 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1590 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1591 error_offset = 1; 1592 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1593 break; 1594 } 1595 1596 } 1597 1598 static int 1599 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1600 { 1601 int rc = 0; 1602 struct nvme_tcp_pdu *pdu; 1603 uint32_t data_len; 1604 enum nvme_tcp_pdu_recv_state prev_state; 1605 1606 /* The loop here is to allow for several back-to-back state changes. */ 1607 do { 1608 prev_state = tqpair->recv_state; 1609 switch (tqpair->recv_state) { 1610 /* If in a new state */ 1611 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1612 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1613 break; 1614 /* common header */ 1615 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1616 pdu = tqpair->recv_pdu; 1617 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1618 rc = nvme_tcp_read_data(tqpair->sock, 1619 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1620 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1621 if (rc < 0) { 1622 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1623 break; 1624 } 1625 pdu->ch_valid_bytes += rc; 1626 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1627 rc = NVME_TCP_PDU_IN_PROGRESS; 1628 goto out; 1629 } 1630 } 1631 1632 /* The command header of this PDU has now been read from the socket. */ 1633 nvme_tcp_pdu_ch_handle(tqpair); 1634 break; 1635 /* Wait for the pdu specific header */ 1636 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1637 pdu = tqpair->recv_pdu; 1638 rc = nvme_tcp_read_data(tqpair->sock, 1639 pdu->psh_len - pdu->psh_valid_bytes, 1640 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1641 if (rc < 0) { 1642 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1643 break; 1644 } 1645 1646 pdu->psh_valid_bytes += rc; 1647 if (pdu->psh_valid_bytes < pdu->psh_len) { 1648 rc = NVME_TCP_PDU_IN_PROGRESS; 1649 goto out; 1650 } 1651 1652 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1653 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1654 break; 1655 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1656 pdu = tqpair->recv_pdu; 1657 /* check whether the data is valid, if not we just return */ 1658 if (!pdu->data_len) { 1659 return NVME_TCP_PDU_IN_PROGRESS; 1660 } 1661 1662 data_len = pdu->data_len; 1663 /* data digest */ 1664 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1665 tqpair->flags.host_ddgst_enable)) { 1666 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1667 pdu->ddgst_enable = true; 1668 } 1669 1670 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1671 if (rc < 0) { 1672 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1673 break; 1674 } 1675 1676 pdu->rw_offset += rc; 1677 if (pdu->rw_offset < data_len) { 1678 rc = NVME_TCP_PDU_IN_PROGRESS; 1679 goto out; 1680 } 1681 1682 assert(pdu->rw_offset == data_len); 1683 /* All of this PDU has now been read from the socket. */ 1684 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1685 break; 1686 case NVME_TCP_PDU_RECV_STATE_ERROR: 1687 rc = NVME_TCP_PDU_FATAL; 1688 break; 1689 default: 1690 assert(0); 1691 break; 1692 } 1693 } while (prev_state != tqpair->recv_state); 1694 1695 out: 1696 *reaped += tqpair->async_complete; 1697 tqpair->async_complete = 0; 1698 1699 return rc; 1700 } 1701 1702 static void 1703 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1704 { 1705 uint64_t t02; 1706 struct nvme_tcp_req *tcp_req, *tmp; 1707 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1708 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1709 struct spdk_nvme_ctrlr_process *active_proc; 1710 1711 /* Don't check timeouts during controller initialization. */ 1712 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1713 return; 1714 } 1715 1716 if (nvme_qpair_is_admin_queue(qpair)) { 1717 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1718 } else { 1719 active_proc = qpair->active_proc; 1720 } 1721 1722 /* Only check timeouts if the current process has a timeout callback. */ 1723 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1724 return; 1725 } 1726 1727 t02 = spdk_get_ticks(); 1728 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1729 assert(tcp_req->req != NULL); 1730 1731 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1732 /* 1733 * The requests are in order, so as soon as one has not timed out, 1734 * stop iterating. 1735 */ 1736 break; 1737 } 1738 } 1739 } 1740 1741 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 1742 struct spdk_nvme_qpair *qpair); 1743 1744 static int 1745 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1746 { 1747 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1748 uint32_t reaped; 1749 int rc; 1750 1751 if (qpair->poll_group == NULL) { 1752 rc = spdk_sock_flush(tqpair->sock); 1753 if (rc < 0) { 1754 return rc; 1755 } 1756 } 1757 1758 if (max_completions == 0) { 1759 max_completions = tqpair->num_entries; 1760 } else { 1761 max_completions = spdk_min(max_completions, tqpair->num_entries); 1762 } 1763 1764 reaped = 0; 1765 do { 1766 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1767 if (rc < 0) { 1768 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1769 errno, spdk_strerror(errno)); 1770 goto fail; 1771 } else if (rc == 0) { 1772 /* Partial PDU is read */ 1773 break; 1774 } 1775 1776 } while (reaped < max_completions); 1777 1778 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1779 nvme_tcp_qpair_check_timeout(qpair); 1780 } 1781 1782 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 1783 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 1784 if (rc != 0 && rc != -EAGAIN) { 1785 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 1786 goto fail; 1787 } else if (rc == 0) { 1788 /* Once the connection is completed, we can submit queued requests */ 1789 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 1790 } 1791 } 1792 1793 return reaped; 1794 fail: 1795 1796 /* 1797 * Since admin queues take the ctrlr_lock before entering this function, 1798 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1799 * to call the generic function which will take the lock for us. 1800 */ 1801 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1802 1803 if (nvme_qpair_is_admin_queue(qpair)) { 1804 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1805 } else { 1806 nvme_ctrlr_disconnect_qpair(qpair); 1807 } 1808 return -ENXIO; 1809 } 1810 1811 static void 1812 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1813 { 1814 struct spdk_nvme_qpair *qpair = ctx; 1815 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1816 int32_t num_completions; 1817 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1818 1819 if (tqpair->needs_poll) { 1820 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1821 tqpair->needs_poll = false; 1822 } 1823 1824 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1825 1826 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1827 pgroup->num_completions += num_completions; 1828 pgroup->stats.nvme_completions += num_completions; 1829 } else { 1830 pgroup->num_completions = -ENXIO; 1831 } 1832 } 1833 1834 static int 1835 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1836 { 1837 struct spdk_nvme_tcp_ic_req *ic_req; 1838 struct nvme_tcp_pdu *pdu; 1839 1840 pdu = tqpair->send_pdu; 1841 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1842 ic_req = &pdu->hdr.ic_req; 1843 1844 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1845 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1846 ic_req->pfv = 0; 1847 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1848 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1849 1850 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1851 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1852 1853 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1854 1855 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1856 return 0; 1857 } 1858 1859 static int 1860 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1861 { 1862 struct sockaddr_storage dst_addr; 1863 struct sockaddr_storage src_addr; 1864 int rc; 1865 struct nvme_tcp_qpair *tqpair; 1866 int family; 1867 long int port; 1868 struct spdk_sock_opts opts; 1869 1870 tqpair = nvme_tcp_qpair(qpair); 1871 1872 switch (ctrlr->trid.adrfam) { 1873 case SPDK_NVMF_ADRFAM_IPV4: 1874 family = AF_INET; 1875 break; 1876 case SPDK_NVMF_ADRFAM_IPV6: 1877 family = AF_INET6; 1878 break; 1879 default: 1880 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1881 rc = -1; 1882 return rc; 1883 } 1884 1885 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1886 1887 memset(&dst_addr, 0, sizeof(dst_addr)); 1888 1889 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1890 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1891 if (rc != 0) { 1892 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1893 return rc; 1894 } 1895 1896 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1897 memset(&src_addr, 0, sizeof(src_addr)); 1898 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1899 if (rc != 0) { 1900 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1901 return rc; 1902 } 1903 } 1904 1905 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1906 if (port <= 0 || port >= INT_MAX) { 1907 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1908 rc = -1; 1909 return rc; 1910 } 1911 1912 opts.opts_size = sizeof(opts); 1913 spdk_sock_get_default_opts(&opts); 1914 opts.priority = ctrlr->trid.priority; 1915 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1916 if (ctrlr->opts.transport_ack_timeout) { 1917 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 1918 } 1919 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1920 if (!tqpair->sock) { 1921 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1922 tqpair, ctrlr->trid.traddr, port); 1923 rc = -1; 1924 return rc; 1925 } 1926 1927 return 0; 1928 } 1929 1930 static int 1931 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1932 { 1933 struct nvme_tcp_qpair *tqpair; 1934 int rc; 1935 1936 tqpair = nvme_tcp_qpair(qpair); 1937 1938 /* Prevent this function from being called recursively, as it could lead to issues with 1939 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 1940 * call. 1941 */ 1942 if (tqpair->flags.in_connect_poll) { 1943 return -EAGAIN; 1944 } 1945 1946 tqpair->flags.in_connect_poll = 1; 1947 1948 switch (tqpair->state) { 1949 case NVME_TCP_QPAIR_STATE_INVALID: 1950 case NVME_TCP_QPAIR_STATE_INITIALIZING: 1951 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 1952 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1953 rc = -ETIMEDOUT; 1954 break; 1955 } 1956 rc = -EAGAIN; 1957 break; 1958 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1959 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 1960 if (rc < 0) { 1961 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1962 break; 1963 } 1964 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 1965 rc = -EAGAIN; 1966 break; 1967 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1968 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 1969 if (rc == 0) { 1970 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1971 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 1972 } else if (rc != -EAGAIN) { 1973 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 1974 } 1975 break; 1976 case NVME_TCP_QPAIR_STATE_RUNNING: 1977 rc = 0; 1978 break; 1979 default: 1980 assert(false); 1981 rc = -EINVAL; 1982 break; 1983 } 1984 1985 tqpair->flags.in_connect_poll = 0; 1986 return rc; 1987 } 1988 1989 static int 1990 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1991 { 1992 int rc = 0; 1993 struct nvme_tcp_qpair *tqpair; 1994 struct nvme_tcp_poll_group *tgroup; 1995 1996 tqpair = nvme_tcp_qpair(qpair); 1997 1998 if (!tqpair->sock) { 1999 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2000 if (rc < 0) { 2001 return rc; 2002 } 2003 } 2004 2005 if (qpair->poll_group) { 2006 rc = nvme_poll_group_connect_qpair(qpair); 2007 if (rc) { 2008 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2009 return rc; 2010 } 2011 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2012 tqpair->stats = &tgroup->stats; 2013 tqpair->shared_stats = true; 2014 } else { 2015 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2016 if (!tqpair->stats) { 2017 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2018 return -ENOMEM; 2019 } 2020 } 2021 2022 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2023 /* Explicitly set the state and recv_state of tqpair */ 2024 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2025 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2026 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2027 } 2028 rc = nvme_tcp_qpair_icreq_send(tqpair); 2029 if (rc != 0) { 2030 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2031 return rc; 2032 } 2033 2034 return rc; 2035 } 2036 2037 static struct spdk_nvme_qpair * 2038 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2039 uint16_t qid, uint32_t qsize, 2040 enum spdk_nvme_qprio qprio, 2041 uint32_t num_requests, bool async) 2042 { 2043 struct nvme_tcp_qpair *tqpair; 2044 struct spdk_nvme_qpair *qpair; 2045 int rc; 2046 2047 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2048 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2049 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2050 return NULL; 2051 } 2052 2053 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2054 if (!tqpair) { 2055 SPDK_ERRLOG("failed to get create tqpair\n"); 2056 return NULL; 2057 } 2058 2059 /* Set num_entries one less than queue size. According to NVMe 2060 * and NVMe-oF specs we can not submit queue size requests, 2061 * one slot shall always remain empty. 2062 */ 2063 tqpair->num_entries = qsize - 1; 2064 qpair = &tqpair->qpair; 2065 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2066 if (rc != 0) { 2067 free(tqpair); 2068 return NULL; 2069 } 2070 2071 rc = nvme_tcp_alloc_reqs(tqpair); 2072 if (rc) { 2073 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2074 return NULL; 2075 } 2076 2077 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2078 * So create the socket first when creating a qpair. */ 2079 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2080 if (rc) { 2081 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2082 return NULL; 2083 } 2084 2085 return qpair; 2086 } 2087 2088 static struct spdk_nvme_qpair * 2089 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2090 const struct spdk_nvme_io_qpair_opts *opts) 2091 { 2092 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2093 opts->io_queue_requests, opts->async_mode); 2094 } 2095 2096 /* We have to use the typedef in the function declaration to appease astyle. */ 2097 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2098 2099 static spdk_nvme_ctrlr_t * 2100 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2101 const struct spdk_nvme_ctrlr_opts *opts, 2102 void *devhandle) 2103 { 2104 struct nvme_tcp_ctrlr *tctrlr; 2105 int rc; 2106 2107 tctrlr = calloc(1, sizeof(*tctrlr)); 2108 if (tctrlr == NULL) { 2109 SPDK_ERRLOG("could not allocate ctrlr\n"); 2110 return NULL; 2111 } 2112 2113 tctrlr->ctrlr.opts = *opts; 2114 tctrlr->ctrlr.trid = *trid; 2115 2116 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2117 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2118 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2119 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2120 } 2121 2122 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2123 if (rc != 0) { 2124 free(tctrlr); 2125 return NULL; 2126 } 2127 2128 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2129 tctrlr->ctrlr.opts.admin_queue_size, 0, 2130 tctrlr->ctrlr.opts.admin_queue_size, true); 2131 if (!tctrlr->ctrlr.adminq) { 2132 SPDK_ERRLOG("failed to create admin qpair\n"); 2133 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2134 return NULL; 2135 } 2136 2137 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2138 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2139 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2140 return NULL; 2141 } 2142 2143 return &tctrlr->ctrlr; 2144 } 2145 2146 static uint32_t 2147 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2148 { 2149 /* TCP transport doesn't limit maximum IO transfer size. */ 2150 return UINT32_MAX; 2151 } 2152 2153 static uint16_t 2154 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2155 { 2156 /* 2157 * We do not support >1 SGE in the initiator currently, 2158 * so we can only return 1 here. Once that support is 2159 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 2160 * instead. 2161 */ 2162 return 1; 2163 } 2164 2165 static int 2166 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2167 int (*iter_fn)(struct nvme_request *req, void *arg), 2168 void *arg) 2169 { 2170 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2171 struct nvme_tcp_req *tcp_req, *tmp; 2172 int rc; 2173 2174 assert(iter_fn != NULL); 2175 2176 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2177 assert(tcp_req->req != NULL); 2178 2179 rc = iter_fn(tcp_req->req, arg); 2180 if (rc != 0) { 2181 return rc; 2182 } 2183 } 2184 2185 return 0; 2186 } 2187 2188 static void 2189 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2190 { 2191 struct nvme_tcp_req *tcp_req, *tmp; 2192 struct spdk_nvme_cpl cpl; 2193 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2194 2195 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2196 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2197 2198 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2199 assert(tcp_req->req != NULL); 2200 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2201 continue; 2202 } 2203 2204 nvme_tcp_req_complete(tcp_req, &cpl); 2205 nvme_tcp_req_put(tqpair, tcp_req); 2206 } 2207 } 2208 2209 static struct spdk_nvme_transport_poll_group * 2210 nvme_tcp_poll_group_create(void) 2211 { 2212 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2213 2214 if (group == NULL) { 2215 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2216 return NULL; 2217 } 2218 2219 TAILQ_INIT(&group->needs_poll); 2220 2221 group->sock_group = spdk_sock_group_create(group); 2222 if (group->sock_group == NULL) { 2223 free(group); 2224 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2225 return NULL; 2226 } 2227 2228 return &group->group; 2229 } 2230 2231 static struct spdk_nvme_transport_poll_group * 2232 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2233 { 2234 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2235 struct spdk_sock_group *group = NULL; 2236 int rc; 2237 2238 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2239 if (!rc && group != NULL) { 2240 return spdk_sock_group_get_ctx(group); 2241 } 2242 2243 return NULL; 2244 } 2245 2246 static int 2247 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2248 { 2249 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2250 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2251 2252 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2253 return -EPROTO; 2254 } 2255 return 0; 2256 } 2257 2258 static int 2259 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2260 { 2261 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2262 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2263 2264 if (tqpair->needs_poll) { 2265 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2266 tqpair->needs_poll = false; 2267 } 2268 2269 if (tqpair->sock && group->sock_group) { 2270 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2271 return -EPROTO; 2272 } 2273 } 2274 return 0; 2275 } 2276 2277 static int 2278 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2279 struct spdk_nvme_qpair *qpair) 2280 { 2281 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2282 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2283 2284 /* disconnected qpairs won't have a sock to add. */ 2285 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2286 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2287 return -EPROTO; 2288 } 2289 } 2290 2291 return 0; 2292 } 2293 2294 static int 2295 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2296 struct spdk_nvme_qpair *qpair) 2297 { 2298 struct nvme_tcp_qpair *tqpair; 2299 struct nvme_tcp_poll_group *group; 2300 2301 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2302 2303 tqpair = nvme_tcp_qpair(qpair); 2304 group = nvme_tcp_poll_group(tgroup); 2305 2306 assert(tqpair->shared_stats == true); 2307 tqpair->stats = &g_dummy_stats; 2308 2309 if (tqpair->needs_poll) { 2310 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2311 tqpair->needs_poll = false; 2312 } 2313 2314 return 0; 2315 } 2316 2317 static int64_t 2318 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2319 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2320 { 2321 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2322 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2323 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2324 int num_events; 2325 2326 group->completions_per_qpair = completions_per_qpair; 2327 group->num_completions = 0; 2328 group->stats.polls++; 2329 2330 num_events = spdk_sock_group_poll(group->sock_group); 2331 2332 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2333 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2334 } 2335 2336 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2337 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2338 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2339 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2340 } 2341 2342 if (spdk_unlikely(num_events < 0)) { 2343 return num_events; 2344 } 2345 2346 group->stats.idle_polls += !num_events; 2347 group->stats.socket_completions += num_events; 2348 2349 return group->num_completions; 2350 } 2351 2352 static int 2353 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2354 { 2355 int rc; 2356 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2357 2358 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2359 return -EBUSY; 2360 } 2361 2362 rc = spdk_sock_group_close(&group->sock_group); 2363 if (rc != 0) { 2364 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2365 assert(false); 2366 } 2367 2368 free(tgroup); 2369 2370 return 0; 2371 } 2372 2373 static int 2374 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2375 struct spdk_nvme_transport_poll_group_stat **_stats) 2376 { 2377 struct nvme_tcp_poll_group *group; 2378 struct spdk_nvme_transport_poll_group_stat *stats; 2379 2380 if (tgroup == NULL || _stats == NULL) { 2381 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2382 return -EINVAL; 2383 } 2384 2385 group = nvme_tcp_poll_group(tgroup); 2386 2387 stats = calloc(1, sizeof(*stats)); 2388 if (!stats) { 2389 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2390 return -ENOMEM; 2391 } 2392 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2393 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2394 2395 *_stats = stats; 2396 2397 return 0; 2398 } 2399 2400 static void 2401 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2402 struct spdk_nvme_transport_poll_group_stat *stats) 2403 { 2404 free(stats); 2405 } 2406 2407 const struct spdk_nvme_transport_ops tcp_ops = { 2408 .name = "TCP", 2409 .type = SPDK_NVME_TRANSPORT_TCP, 2410 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2411 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2412 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2413 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2414 2415 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2416 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2417 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2418 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2419 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2420 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2421 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2422 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2423 2424 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2425 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2426 2427 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2428 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2429 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2430 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2431 2432 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2433 .qpair_reset = nvme_tcp_qpair_reset, 2434 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2435 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2436 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2437 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2438 2439 .poll_group_create = nvme_tcp_poll_group_create, 2440 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2441 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2442 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2443 .poll_group_add = nvme_tcp_poll_group_add, 2444 .poll_group_remove = nvme_tcp_poll_group_remove, 2445 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2446 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2447 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2448 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2449 }; 2450 2451 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2452