1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe/TCP transport 9 */ 10 11 #include "nvme_internal.h" 12 13 #include "spdk/endian.h" 14 #include "spdk/likely.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/crc32.h" 18 #include "spdk/endian.h" 19 #include "spdk/assert.h" 20 #include "spdk/string.h" 21 #include "spdk/thread.h" 22 #include "spdk/trace.h" 23 #include "spdk/util.h" 24 25 #include "spdk_internal/nvme_tcp.h" 26 27 #define NVME_TCP_RW_BUFFER_SIZE 131072 28 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 29 30 #define NVME_TCP_HPDA_DEFAULT 0 31 #define NVME_TCP_MAX_R2T_DEFAULT 1 32 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 33 34 /* 35 * Maximum value of transport_ack_timeout used by TCP controller 36 */ 37 #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 38 39 40 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 41 struct nvme_tcp_ctrlr { 42 struct spdk_nvme_ctrlr ctrlr; 43 }; 44 45 struct nvme_tcp_poll_group { 46 struct spdk_nvme_transport_poll_group group; 47 struct spdk_sock_group *sock_group; 48 uint32_t completions_per_qpair; 49 int64_t num_completions; 50 51 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 52 struct spdk_nvme_tcp_stat stats; 53 }; 54 55 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 56 struct nvme_tcp_qpair { 57 struct spdk_nvme_qpair qpair; 58 struct spdk_sock *sock; 59 60 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 61 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 62 63 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 64 struct nvme_tcp_pdu *recv_pdu; 65 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 66 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 67 enum nvme_tcp_pdu_recv_state recv_state; 68 struct nvme_tcp_req *tcp_reqs; 69 struct spdk_nvme_tcp_stat *stats; 70 71 uint16_t num_entries; 72 uint16_t async_complete; 73 74 struct { 75 uint16_t host_hdgst_enable: 1; 76 uint16_t host_ddgst_enable: 1; 77 uint16_t icreq_send_ack: 1; 78 uint16_t in_connect_poll: 1; 79 uint16_t reserved: 12; 80 } flags; 81 82 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 83 uint32_t maxh2cdata; 84 85 uint32_t maxr2t; 86 87 /* 0 based value, which is used to guide the padding */ 88 uint8_t cpda; 89 90 enum nvme_tcp_qpair_state state; 91 92 TAILQ_ENTRY(nvme_tcp_qpair) link; 93 bool needs_poll; 94 95 uint64_t icreq_timeout_tsc; 96 97 bool shared_stats; 98 }; 99 100 enum nvme_tcp_req_state { 101 NVME_TCP_REQ_FREE, 102 NVME_TCP_REQ_ACTIVE, 103 NVME_TCP_REQ_ACTIVE_R2T, 104 }; 105 106 struct nvme_tcp_req { 107 struct nvme_request *req; 108 enum nvme_tcp_req_state state; 109 uint16_t cid; 110 uint16_t ttag; 111 uint32_t datao; 112 uint32_t expected_datao; 113 uint32_t r2tl_remain; 114 uint32_t active_r2ts; 115 /* Used to hold a value received from subsequent R2T while we are still 116 * waiting for H2C complete */ 117 uint16_t ttag_r2t_next; 118 bool in_capsule_data; 119 bool pdu_in_use; 120 /* It is used to track whether the req can be safely freed */ 121 union { 122 uint8_t raw; 123 struct { 124 /* The last send operation completed - kernel released send buffer */ 125 uint8_t send_ack : 1; 126 /* Data transfer completed - target send resp or last data bit */ 127 uint8_t data_recv : 1; 128 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 129 * from kernel) to send H2C */ 130 uint8_t h2c_send_waiting_ack : 1; 131 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 132 * Rare case, actual when dealing with target that can send several R2T requests. 133 * SPDK TCP target sends 1 R2T for the whole data buffer */ 134 uint8_t r2t_waiting_h2c_complete : 1; 135 uint8_t reserved : 4; 136 } bits; 137 } ordering; 138 struct nvme_tcp_pdu *pdu; 139 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 140 uint32_t iovcnt; 141 /* Used to hold a value received from subsequent R2T while we are still 142 * waiting for H2C ack */ 143 uint32_t r2tl_remain_next; 144 struct nvme_tcp_qpair *tqpair; 145 TAILQ_ENTRY(nvme_tcp_req) link; 146 struct spdk_nvme_cpl rsp; 147 }; 148 149 static struct spdk_nvme_tcp_stat g_dummy_stats = {}; 150 151 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 152 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 153 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 154 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 155 static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair, 156 struct spdk_nvme_cpl *rsp, bool print_on_error); 157 158 static inline struct nvme_tcp_qpair * 159 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 160 { 161 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 162 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 163 } 164 165 static inline struct nvme_tcp_poll_group * 166 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 167 { 168 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 169 } 170 171 static inline struct nvme_tcp_ctrlr * 172 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 173 { 174 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 175 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 176 } 177 178 static struct nvme_tcp_req * 179 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 180 { 181 struct nvme_tcp_req *tcp_req; 182 183 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 184 if (!tcp_req) { 185 return NULL; 186 } 187 188 assert(tcp_req->state == NVME_TCP_REQ_FREE); 189 tcp_req->state = NVME_TCP_REQ_ACTIVE; 190 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 191 tcp_req->datao = 0; 192 tcp_req->expected_datao = 0; 193 tcp_req->req = NULL; 194 tcp_req->in_capsule_data = false; 195 tcp_req->pdu_in_use = false; 196 tcp_req->r2tl_remain = 0; 197 tcp_req->r2tl_remain_next = 0; 198 tcp_req->active_r2ts = 0; 199 tcp_req->iovcnt = 0; 200 tcp_req->ordering.raw = 0; 201 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 202 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 203 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 204 205 return tcp_req; 206 } 207 208 static void 209 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 210 { 211 assert(tcp_req->state != NVME_TCP_REQ_FREE); 212 tcp_req->state = NVME_TCP_REQ_FREE; 213 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 214 } 215 216 static int 217 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 218 { 219 struct addrinfo *res; 220 struct addrinfo hints; 221 int ret; 222 223 memset(&hints, 0, sizeof(hints)); 224 hints.ai_family = family; 225 hints.ai_socktype = SOCK_STREAM; 226 hints.ai_protocol = 0; 227 228 ret = getaddrinfo(addr, service, &hints, &res); 229 if (ret) { 230 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 231 return ret; 232 } 233 234 if (res->ai_addrlen > sizeof(*sa)) { 235 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 236 ret = -EINVAL; 237 } else { 238 memcpy(sa, res->ai_addr, res->ai_addrlen); 239 } 240 241 freeaddrinfo(res); 242 return ret; 243 } 244 245 static void 246 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 247 { 248 free(tqpair->tcp_reqs); 249 tqpair->tcp_reqs = NULL; 250 251 spdk_free(tqpair->send_pdus); 252 tqpair->send_pdus = NULL; 253 } 254 255 static int 256 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 257 { 258 uint16_t i; 259 struct nvme_tcp_req *tcp_req; 260 261 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 262 if (tqpair->tcp_reqs == NULL) { 263 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 264 goto fail; 265 } 266 267 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 268 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 269 0x1000, NULL, 270 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 271 272 if (tqpair->send_pdus == NULL) { 273 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 274 goto fail; 275 } 276 277 TAILQ_INIT(&tqpair->send_queue); 278 TAILQ_INIT(&tqpair->free_reqs); 279 TAILQ_INIT(&tqpair->outstanding_reqs); 280 for (i = 0; i < tqpair->num_entries; i++) { 281 tcp_req = &tqpair->tcp_reqs[i]; 282 tcp_req->cid = i; 283 tcp_req->tqpair = tqpair; 284 tcp_req->pdu = &tqpair->send_pdus[i]; 285 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 286 } 287 288 tqpair->send_pdu = &tqpair->send_pdus[i]; 289 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 290 291 return 0; 292 fail: 293 nvme_tcp_free_reqs(tqpair); 294 return -ENOMEM; 295 } 296 297 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 298 299 static void 300 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 301 { 302 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 303 struct nvme_tcp_pdu *pdu; 304 int rc; 305 struct nvme_tcp_poll_group *group; 306 307 if (tqpair->needs_poll) { 308 group = nvme_tcp_poll_group(qpair->poll_group); 309 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 310 tqpair->needs_poll = false; 311 } 312 313 rc = spdk_sock_close(&tqpair->sock); 314 315 if (tqpair->sock != NULL) { 316 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 317 /* Set it to NULL manually */ 318 tqpair->sock = NULL; 319 } 320 321 /* clear the send_queue */ 322 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 323 pdu = TAILQ_FIRST(&tqpair->send_queue); 324 /* Remove the pdu from the send_queue to prevent the wrong sending out 325 * in the next round connection 326 */ 327 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 328 } 329 330 nvme_tcp_qpair_abort_reqs(qpair, 0); 331 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 332 } 333 334 static int 335 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 336 { 337 struct nvme_tcp_qpair *tqpair; 338 339 assert(qpair != NULL); 340 nvme_tcp_qpair_abort_reqs(qpair, 0); 341 nvme_qpair_deinit(qpair); 342 tqpair = nvme_tcp_qpair(qpair); 343 nvme_tcp_free_reqs(tqpair); 344 if (!tqpair->shared_stats) { 345 free(tqpair->stats); 346 } 347 free(tqpair); 348 349 return 0; 350 } 351 352 static int 353 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 354 { 355 return 0; 356 } 357 358 static int 359 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 360 { 361 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 362 363 if (ctrlr->adminq) { 364 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 365 } 366 367 nvme_ctrlr_destruct_finish(ctrlr); 368 369 free(tctrlr); 370 371 return 0; 372 } 373 374 static void 375 _pdu_write_done(void *cb_arg, int err) 376 { 377 struct nvme_tcp_pdu *pdu = cb_arg; 378 struct nvme_tcp_qpair *tqpair = pdu->qpair; 379 struct nvme_tcp_poll_group *pgroup; 380 381 /* If there are queued requests, we assume they are queued because they are waiting 382 * for resources to be released. Those resources are almost certainly released in 383 * response to a PDU completing here. However, to attempt to make forward progress 384 * the qpair needs to be polled and we can't rely on another network event to make 385 * that happen. Add it to a list of qpairs to poll regardless of network activity 386 * here. 387 * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or 388 * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make 389 * forward progress in case that the resources are released after icreq's or CONNECT's 390 * resp is processed. */ 391 if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) || 392 tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL || 393 tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) { 394 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 395 396 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 397 tqpair->needs_poll = true; 398 } 399 400 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 401 402 if (err != 0) { 403 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 404 return; 405 } 406 407 assert(pdu->cb_fn != NULL); 408 pdu->cb_fn(pdu->cb_arg); 409 } 410 411 static void 412 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 413 { 414 uint32_t mapped_length = 0; 415 struct nvme_tcp_qpair *tqpair = pdu->qpair; 416 417 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 418 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 419 &mapped_length); 420 pdu->sock_req.cb_fn = _pdu_write_done; 421 pdu->sock_req.cb_arg = pdu; 422 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 423 tqpair->stats->submitted_requests++; 424 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 425 } 426 427 static void 428 data_crc32_accel_done(void *cb_arg, int status) 429 { 430 struct nvme_tcp_pdu *pdu = cb_arg; 431 432 if (spdk_unlikely(status)) { 433 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 434 _pdu_write_done(pdu, status); 435 return; 436 } 437 438 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 439 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 440 441 _tcp_write_pdu(pdu); 442 } 443 444 static void 445 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 446 { 447 struct nvme_tcp_qpair *tqpair = pdu->qpair; 448 uint32_t crc32c; 449 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 450 451 /* Data Digest */ 452 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 453 tqpair->flags.host_ddgst_enable) { 454 /* Only suport this limited case for the first step */ 455 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 456 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 457 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 458 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 459 &pdu->data_digest_crc32, pdu->data_iov, 460 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 461 return; 462 } 463 464 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 465 crc32c = crc32c ^ SPDK_CRC32C_XOR; 466 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 467 } 468 469 _tcp_write_pdu(pdu); 470 } 471 472 static int 473 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 474 struct nvme_tcp_pdu *pdu, 475 nvme_tcp_qpair_xfer_complete_cb cb_fn, 476 void *cb_arg) 477 { 478 int hlen; 479 uint32_t crc32c; 480 481 hlen = pdu->hdr.common.hlen; 482 pdu->cb_fn = cb_fn; 483 pdu->cb_arg = cb_arg; 484 pdu->qpair = tqpair; 485 486 /* Header Digest */ 487 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 488 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 489 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 490 } 491 492 pdu_data_crc32_compute(pdu); 493 494 return 0; 495 } 496 497 /* 498 * Build SGL describing contiguous payload buffer. 499 */ 500 static int 501 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 502 { 503 struct nvme_request *req = tcp_req->req; 504 505 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 506 tcp_req->iov[0].iov_len = req->payload_size; 507 tcp_req->iovcnt = 1; 508 509 SPDK_DEBUGLOG(nvme, "enter\n"); 510 511 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 512 513 return 0; 514 } 515 516 /* 517 * Build SGL describing scattered payload buffer. 518 */ 519 static int 520 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 521 { 522 int rc; 523 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 524 struct nvme_request *req = tcp_req->req; 525 526 SPDK_DEBUGLOG(nvme, "enter\n"); 527 528 assert(req->payload_size != 0); 529 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 530 assert(req->payload.reset_sgl_fn != NULL); 531 assert(req->payload.next_sge_fn != NULL); 532 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 533 534 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 535 remaining_size = req->payload_size; 536 537 do { 538 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 539 &length); 540 if (rc) { 541 return -1; 542 } 543 544 length = spdk_min(length, remaining_size); 545 tcp_req->iov[iovcnt].iov_len = length; 546 remaining_size -= length; 547 iovcnt++; 548 } while (remaining_size > 0 && iovcnt < max_num_sgl); 549 550 551 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 552 if (remaining_size > 0) { 553 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 554 tcp_req, iovcnt, remaining_size); 555 return -1; 556 } 557 558 tcp_req->iovcnt = iovcnt; 559 560 return 0; 561 } 562 563 static int 564 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 565 struct nvme_tcp_req *tcp_req) 566 { 567 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 568 int rc = 0; 569 enum spdk_nvme_data_transfer xfer; 570 uint32_t max_in_capsule_data_size; 571 572 tcp_req->req = req; 573 req->cmd.cid = tcp_req->cid; 574 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 575 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 576 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 577 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 578 579 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 580 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 581 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 582 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 583 } else { 584 rc = -1; 585 } 586 587 if (rc) { 588 return rc; 589 } 590 591 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 592 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 593 594 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 595 } else { 596 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 597 } 598 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 599 max_in_capsule_data_size = ctrlr->ioccsz_bytes; 600 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 601 max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 602 } 603 604 if (req->payload_size <= max_in_capsule_data_size) { 605 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 606 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 607 req->cmd.dptr.sgl1.address = 0; 608 tcp_req->in_capsule_data = true; 609 } 610 } 611 612 return 0; 613 } 614 615 static inline bool 616 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 617 { 618 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 619 return false; 620 } 621 622 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 623 assert(tcp_req->tqpair != NULL); 624 assert(tcp_req->req != NULL); 625 626 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 627 628 if (!tcp_req->tqpair->qpair.in_completion_context) { 629 tcp_req->tqpair->async_complete++; 630 } 631 632 nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true); 633 return true; 634 } 635 636 static void 637 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 638 { 639 struct nvme_tcp_req *tcp_req = cb_arg; 640 641 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 642 tcp_req->tqpair->qpair.id); 643 tcp_req->ordering.bits.send_ack = 1; 644 /* Handle the r2t case */ 645 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 646 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 647 nvme_tcp_send_h2c_data(tcp_req); 648 } else { 649 nvme_tcp_req_complete_safe(tcp_req); 650 } 651 } 652 653 static int 654 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 655 struct nvme_tcp_req *tcp_req) 656 { 657 struct nvme_tcp_pdu *pdu; 658 struct spdk_nvme_tcp_cmd *capsule_cmd; 659 uint32_t plen = 0, alignment; 660 uint8_t pdo; 661 662 SPDK_DEBUGLOG(nvme, "enter\n"); 663 pdu = tcp_req->pdu; 664 665 capsule_cmd = &pdu->hdr.capsule_cmd; 666 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 667 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 668 capsule_cmd->ccsqe = tcp_req->req->cmd; 669 670 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 671 672 if (tqpair->flags.host_hdgst_enable) { 673 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 674 tcp_req); 675 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 676 plen += SPDK_NVME_TCP_DIGEST_LEN; 677 } 678 679 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 680 goto end; 681 } 682 683 pdo = plen; 684 pdu->padding_len = 0; 685 if (tqpair->cpda) { 686 alignment = (tqpair->cpda + 1) << 2; 687 if (alignment > plen) { 688 pdu->padding_len = alignment - plen; 689 pdo = alignment; 690 plen = alignment; 691 } 692 } 693 694 capsule_cmd->common.pdo = pdo; 695 plen += tcp_req->req->payload_size; 696 if (tqpair->flags.host_ddgst_enable) { 697 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 698 plen += SPDK_NVME_TCP_DIGEST_LEN; 699 } 700 701 tcp_req->datao = 0; 702 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 703 0, tcp_req->req->payload_size); 704 end: 705 capsule_cmd->common.plen = plen; 706 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 707 708 } 709 710 static int 711 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 712 struct nvme_request *req) 713 { 714 struct nvme_tcp_qpair *tqpair; 715 struct nvme_tcp_req *tcp_req; 716 717 tqpair = nvme_tcp_qpair(qpair); 718 assert(tqpair != NULL); 719 assert(req != NULL); 720 721 tcp_req = nvme_tcp_req_get(tqpair); 722 if (!tcp_req) { 723 tqpair->stats->queued_requests++; 724 /* Inform the upper layer to try again later. */ 725 return -EAGAIN; 726 } 727 728 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 729 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 730 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 731 nvme_tcp_req_put(tqpair, tcp_req); 732 return -1; 733 } 734 735 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 736 } 737 738 static int 739 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 740 { 741 return 0; 742 } 743 744 static void 745 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 746 struct nvme_tcp_qpair *tqpair, 747 struct spdk_nvme_cpl *rsp, 748 bool print_on_error) 749 { 750 struct spdk_nvme_cpl cpl; 751 spdk_nvme_cmd_cb user_cb; 752 void *user_cb_arg; 753 struct spdk_nvme_qpair *qpair; 754 struct nvme_request *req; 755 bool error, print_error; 756 757 assert(tcp_req->req != NULL); 758 req = tcp_req->req; 759 760 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 761 memcpy(&cpl, rsp, sizeof(cpl)); 762 user_cb = req->cb_fn; 763 user_cb_arg = req->cb_arg; 764 qpair = req->qpair; 765 766 error = spdk_nvme_cpl_is_error(rsp); 767 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 768 769 if (print_error) { 770 spdk_nvme_qpair_print_command(qpair, &req->cmd); 771 } 772 773 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 774 spdk_nvme_qpair_print_completion(qpair, rsp); 775 } 776 777 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 778 nvme_tcp_req_put(tqpair, tcp_req); 779 nvme_free_request(req); 780 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 781 } 782 783 static void 784 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 785 { 786 struct nvme_tcp_req *tcp_req, *tmp; 787 struct spdk_nvme_cpl cpl = {}; 788 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 789 790 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 791 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 792 cpl.status.dnr = dnr; 793 794 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 795 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true); 796 } 797 } 798 799 static void 800 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 801 enum nvme_tcp_pdu_recv_state state) 802 { 803 if (tqpair->recv_state == state) { 804 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 805 tqpair, state); 806 return; 807 } 808 809 tqpair->recv_state = state; 810 switch (state) { 811 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 812 case NVME_TCP_PDU_RECV_STATE_ERROR: 813 memset(tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 814 break; 815 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 816 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 817 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 818 default: 819 break; 820 } 821 } 822 823 static void 824 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 825 { 826 struct nvme_tcp_qpair *tqpair = cb_arg; 827 828 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 829 } 830 831 static void 832 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 833 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 834 { 835 struct nvme_tcp_pdu *rsp_pdu; 836 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 837 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 838 uint8_t copy_len; 839 840 rsp_pdu = tqpair->send_pdu; 841 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 842 h2c_term_req = &rsp_pdu->hdr.term_req; 843 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 844 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 845 846 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 847 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 848 DSET32(&h2c_term_req->fei, error_offset); 849 } 850 851 copy_len = pdu->hdr.common.hlen; 852 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 853 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 854 } 855 856 /* Copy the error info into the buffer */ 857 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 858 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 859 860 /* Contain the header len of the wrong received pdu */ 861 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 862 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 863 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 864 } 865 866 static bool 867 nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair) 868 { 869 switch (tqpair->state) { 870 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 871 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 872 case NVME_TCP_QPAIR_STATE_RUNNING: 873 return true; 874 default: 875 return false; 876 } 877 } 878 879 static void 880 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 881 { 882 struct nvme_tcp_pdu *pdu; 883 uint32_t error_offset = 0; 884 enum spdk_nvme_tcp_term_req_fes fes; 885 uint32_t expected_hlen, hd_len = 0; 886 bool plen_error = false; 887 888 pdu = tqpair->recv_pdu; 889 890 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 891 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 892 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 893 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 894 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 895 goto err; 896 } 897 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 898 if (pdu->hdr.common.plen != expected_hlen) { 899 plen_error = true; 900 } 901 } else { 902 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) { 903 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n"); 904 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 905 goto err; 906 } 907 908 switch (pdu->hdr.common.pdu_type) { 909 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 910 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 911 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 912 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 913 } 914 915 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 916 plen_error = true; 917 } 918 break; 919 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 920 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 921 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 922 plen_error = true; 923 } 924 break; 925 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 926 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 927 if ((pdu->hdr.common.plen <= expected_hlen) || 928 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 929 plen_error = true; 930 } 931 break; 932 case SPDK_NVME_TCP_PDU_TYPE_R2T: 933 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 934 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 935 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 936 } 937 938 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 939 plen_error = true; 940 } 941 break; 942 943 default: 944 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 945 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 946 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 947 goto err; 948 } 949 } 950 951 if (pdu->hdr.common.hlen != expected_hlen) { 952 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 953 expected_hlen, pdu->hdr.common.hlen); 954 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 955 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 956 goto err; 957 958 } else if (plen_error) { 959 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 960 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 961 goto err; 962 } else { 963 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 964 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 965 return; 966 } 967 err: 968 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 969 } 970 971 static struct nvme_tcp_req * 972 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 973 { 974 assert(tqpair != NULL); 975 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 976 return NULL; 977 } 978 979 return &tqpair->tcp_reqs[cid]; 980 } 981 982 static void 983 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 984 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 985 { 986 struct nvme_tcp_req *tcp_req; 987 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 988 uint8_t flags; 989 990 tcp_req = pdu->req; 991 assert(tcp_req != NULL); 992 993 SPDK_DEBUGLOG(nvme, "enter\n"); 994 c2h_data = &pdu->hdr.c2h_data; 995 tcp_req->datao += pdu->data_len; 996 flags = c2h_data->common.flags; 997 998 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) { 999 if (tcp_req->datao == tcp_req->req->payload_size) { 1000 tcp_req->rsp.status.p = 0; 1001 } else { 1002 tcp_req->rsp.status.p = 1; 1003 } 1004 1005 tcp_req->rsp.cid = tcp_req->cid; 1006 tcp_req->rsp.sqid = tqpair->qpair.id; 1007 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 1008 tcp_req->ordering.bits.data_recv = 1; 1009 if (nvme_tcp_req_complete_safe(tcp_req)) { 1010 (*reaped)++; 1011 } 1012 } 1013 } 1014 } 1015 1016 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 1017 "Invalid PDU Header Field", 1018 "PDU Sequence Error", 1019 "Header Digest Error", 1020 "Data Transfer Out of Range", 1021 "Data Transfer Limit Exceeded", 1022 "Unsupported parameter", 1023 }; 1024 1025 static void 1026 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 1027 { 1028 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1029 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1030 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1031 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1032 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1033 DGET32(c2h_term_req->fei)); 1034 } 1035 /* we may also need to dump some other info here */ 1036 } 1037 1038 static void 1039 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1040 struct nvme_tcp_pdu *pdu) 1041 { 1042 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1043 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1044 } 1045 1046 static void 1047 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1048 { 1049 struct nvme_tcp_pdu *pdu; 1050 1051 assert(tqpair != NULL); 1052 pdu = tqpair->recv_pdu; 1053 1054 switch (pdu->hdr.common.pdu_type) { 1055 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1056 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1057 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1058 break; 1059 1060 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1061 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1062 break; 1063 1064 default: 1065 /* The code should not go to here */ 1066 SPDK_ERRLOG("The code should not go to here\n"); 1067 break; 1068 } 1069 } 1070 1071 static void 1072 tcp_data_recv_crc32_done(void *cb_arg, int status) 1073 { 1074 struct nvme_tcp_req *tcp_req = cb_arg; 1075 struct nvme_tcp_pdu *pdu; 1076 struct nvme_tcp_qpair *tqpair; 1077 int rc; 1078 struct nvme_tcp_poll_group *pgroup; 1079 int dummy_reaped = 0; 1080 1081 pdu = tcp_req->pdu; 1082 assert(pdu != NULL); 1083 1084 tqpair = tcp_req->tqpair; 1085 assert(tqpair != NULL); 1086 1087 if (tqpair->qpair.poll_group && !tqpair->needs_poll) { 1088 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1089 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1090 tqpair->needs_poll = true; 1091 } 1092 1093 if (spdk_unlikely(status)) { 1094 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1095 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1096 goto end; 1097 } 1098 1099 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1100 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1101 if (rc == 0) { 1102 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1103 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1104 } 1105 1106 end: 1107 tcp_req->pdu_in_use = false; 1108 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1109 } 1110 1111 static void 1112 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1113 uint32_t *reaped) 1114 { 1115 int rc = 0; 1116 struct nvme_tcp_pdu *pdu = tqpair->recv_pdu; 1117 uint32_t crc32c; 1118 struct nvme_tcp_poll_group *tgroup; 1119 struct nvme_tcp_req *tcp_req = pdu->req; 1120 1121 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1122 SPDK_DEBUGLOG(nvme, "enter\n"); 1123 1124 /* The request can be NULL, e.g. in case of C2HTermReq */ 1125 if (spdk_likely(tcp_req != NULL)) { 1126 tcp_req->expected_datao += pdu->data_len; 1127 } 1128 1129 /* check data digest if need */ 1130 if (pdu->ddgst_enable) { 1131 /* But if the data digest is enabled, tcp_req cannot be NULL */ 1132 assert(tcp_req != NULL); 1133 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1134 /* Only suport this limitated case for the first step */ 1135 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1136 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1137 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1138 && !tcp_req->pdu_in_use)) { 1139 1140 tcp_req->pdu_in_use = true; 1141 tcp_req->pdu->hdr = pdu->hdr; 1142 tcp_req->pdu->req = tcp_req; 1143 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1144 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1145 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1146 tcp_req->pdu->data_len = pdu->data_len; 1147 1148 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1149 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1150 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1151 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1152 return; 1153 } 1154 1155 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1156 crc32c = crc32c ^ SPDK_CRC32C_XOR; 1157 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1158 if (rc == 0) { 1159 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1160 tcp_req = pdu->req; 1161 assert(tcp_req != NULL); 1162 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1163 } 1164 } 1165 1166 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1167 } 1168 1169 static void 1170 nvme_tcp_send_icreq_complete(void *cb_arg) 1171 { 1172 struct nvme_tcp_qpair *tqpair = cb_arg; 1173 1174 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1175 1176 tqpair->flags.icreq_send_ack = true; 1177 1178 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1179 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id); 1180 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1181 } 1182 } 1183 1184 static void 1185 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1186 struct nvme_tcp_pdu *pdu) 1187 { 1188 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1189 uint32_t error_offset = 0; 1190 enum spdk_nvme_tcp_term_req_fes fes; 1191 int recv_buf_size; 1192 1193 /* Only PFV 0 is defined currently */ 1194 if (ic_resp->pfv != 0) { 1195 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1196 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1197 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1198 goto end; 1199 } 1200 1201 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1202 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1203 ic_resp->maxh2cdata); 1204 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1205 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1206 goto end; 1207 } 1208 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1209 1210 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1211 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1212 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1213 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1214 goto end; 1215 } 1216 tqpair->cpda = ic_resp->cpda; 1217 1218 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1219 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1220 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1221 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1222 1223 /* Now that we know whether digests are enabled, properly size the receive buffer to 1224 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1225 * parameter. */ 1226 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1227 1228 if (tqpair->flags.host_hdgst_enable) { 1229 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1230 } 1231 1232 if (tqpair->flags.host_ddgst_enable) { 1233 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1234 } 1235 1236 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1237 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1238 tqpair, 1239 recv_buf_size); 1240 /* Not fatal. */ 1241 } 1242 1243 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1244 1245 if (!tqpair->flags.icreq_send_ack) { 1246 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1247 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1248 return; 1249 } 1250 1251 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND; 1252 return; 1253 end: 1254 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1255 } 1256 1257 static void 1258 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1259 uint32_t *reaped) 1260 { 1261 struct nvme_tcp_req *tcp_req; 1262 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1263 uint32_t cid, error_offset = 0; 1264 enum spdk_nvme_tcp_term_req_fes fes; 1265 1266 SPDK_DEBUGLOG(nvme, "enter\n"); 1267 cid = capsule_resp->rccqe.cid; 1268 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1269 1270 if (!tcp_req) { 1271 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1272 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1273 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1274 goto end; 1275 } 1276 1277 assert(tcp_req->req != NULL); 1278 1279 tcp_req->rsp = capsule_resp->rccqe; 1280 tcp_req->ordering.bits.data_recv = 1; 1281 1282 /* Recv the pdu again */ 1283 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1284 1285 if (nvme_tcp_req_complete_safe(tcp_req)) { 1286 (*reaped)++; 1287 } 1288 1289 return; 1290 1291 end: 1292 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1293 } 1294 1295 static void 1296 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1297 struct nvme_tcp_pdu *pdu) 1298 { 1299 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1300 uint32_t error_offset = 0; 1301 enum spdk_nvme_tcp_term_req_fes fes; 1302 1303 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1304 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1305 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1306 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1307 goto end; 1308 } 1309 1310 /* set the data buffer */ 1311 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1312 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1313 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1314 return; 1315 end: 1316 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1317 } 1318 1319 static void 1320 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1321 { 1322 struct nvme_tcp_req *tcp_req; 1323 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1324 uint32_t error_offset = 0; 1325 enum spdk_nvme_tcp_term_req_fes fes; 1326 int flags = c2h_data->common.flags; 1327 1328 SPDK_DEBUGLOG(nvme, "enter\n"); 1329 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1330 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1331 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1332 if (!tcp_req) { 1333 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1334 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1335 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1336 goto end; 1337 1338 } 1339 1340 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n", 1341 tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size); 1342 1343 if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) && 1344 !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) { 1345 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data); 1346 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1347 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common); 1348 goto end; 1349 } 1350 1351 if (c2h_data->datal > tcp_req->req->payload_size) { 1352 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1353 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1354 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1355 goto end; 1356 } 1357 1358 if (tcp_req->expected_datao != c2h_data->datao) { 1359 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n", 1360 tcp_req, c2h_data->datao, tcp_req->expected_datao); 1361 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1362 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1363 goto end; 1364 } 1365 1366 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1367 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1368 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1369 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1370 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1371 goto end; 1372 1373 } 1374 1375 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1376 c2h_data->datao, c2h_data->datal); 1377 pdu->req = tcp_req; 1378 1379 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1380 return; 1381 1382 end: 1383 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1384 } 1385 1386 static void 1387 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1388 { 1389 struct nvme_tcp_req *tcp_req = cb_arg; 1390 1391 assert(tcp_req != NULL); 1392 1393 tcp_req->ordering.bits.send_ack = 1; 1394 if (tcp_req->r2tl_remain) { 1395 nvme_tcp_send_h2c_data(tcp_req); 1396 } else { 1397 assert(tcp_req->active_r2ts > 0); 1398 tcp_req->active_r2ts--; 1399 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1400 1401 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1402 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1403 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1404 assert(tcp_req->active_r2ts > 0); 1405 tcp_req->ttag = tcp_req->ttag_r2t_next; 1406 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1407 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1408 nvme_tcp_send_h2c_data(tcp_req); 1409 return; 1410 } 1411 1412 /* Need also call this function to free the resource */ 1413 nvme_tcp_req_complete_safe(tcp_req); 1414 } 1415 } 1416 1417 static void 1418 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1419 { 1420 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1421 struct nvme_tcp_pdu *rsp_pdu; 1422 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1423 uint32_t plen, pdo, alignment; 1424 1425 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1426 tcp_req->ordering.bits.send_ack = 0; 1427 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1428 rsp_pdu = tcp_req->pdu; 1429 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1430 h2c_data = &rsp_pdu->hdr.h2c_data; 1431 1432 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1433 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1434 h2c_data->cccid = tcp_req->cid; 1435 h2c_data->ttag = tcp_req->ttag; 1436 h2c_data->datao = tcp_req->datao; 1437 1438 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1439 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1440 h2c_data->datao, h2c_data->datal); 1441 tcp_req->r2tl_remain -= h2c_data->datal; 1442 1443 if (tqpair->flags.host_hdgst_enable) { 1444 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1445 plen += SPDK_NVME_TCP_DIGEST_LEN; 1446 } 1447 1448 rsp_pdu->padding_len = 0; 1449 pdo = plen; 1450 if (tqpair->cpda) { 1451 alignment = (tqpair->cpda + 1) << 2; 1452 if (alignment > plen) { 1453 rsp_pdu->padding_len = alignment - plen; 1454 pdo = plen = alignment; 1455 } 1456 } 1457 1458 h2c_data->common.pdo = pdo; 1459 plen += h2c_data->datal; 1460 if (tqpair->flags.host_ddgst_enable) { 1461 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1462 plen += SPDK_NVME_TCP_DIGEST_LEN; 1463 } 1464 1465 h2c_data->common.plen = plen; 1466 tcp_req->datao += h2c_data->datal; 1467 if (!tcp_req->r2tl_remain) { 1468 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1469 } 1470 1471 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1472 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1473 1474 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1475 } 1476 1477 static void 1478 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1479 { 1480 struct nvme_tcp_req *tcp_req; 1481 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1482 uint32_t cid, error_offset = 0; 1483 enum spdk_nvme_tcp_term_req_fes fes; 1484 1485 SPDK_DEBUGLOG(nvme, "enter\n"); 1486 cid = r2t->cccid; 1487 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1488 if (!tcp_req) { 1489 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1490 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1491 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1492 goto end; 1493 } 1494 1495 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1496 tqpair); 1497 1498 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1499 assert(tcp_req->active_r2ts == 0); 1500 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1501 } 1502 1503 if (tcp_req->datao != r2t->r2to) { 1504 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1505 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1506 goto end; 1507 1508 } 1509 1510 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1511 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1512 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1513 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1514 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1515 goto end; 1516 } 1517 1518 tcp_req->active_r2ts++; 1519 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1520 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1521 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1522 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1523 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1524 tcp_req->ttag_r2t_next = r2t->ttag; 1525 tcp_req->r2tl_remain_next = r2t->r2tl; 1526 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1527 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1528 return; 1529 } else { 1530 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1531 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1532 tqpair); 1533 goto end; 1534 } 1535 } 1536 1537 tcp_req->ttag = r2t->ttag; 1538 tcp_req->r2tl_remain = r2t->r2tl; 1539 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1540 1541 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1542 nvme_tcp_send_h2c_data(tcp_req); 1543 } else { 1544 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1545 } 1546 1547 return; 1548 1549 end: 1550 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1551 1552 } 1553 1554 static void 1555 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1556 { 1557 struct nvme_tcp_pdu *pdu; 1558 int rc; 1559 uint32_t crc32c, error_offset = 0; 1560 enum spdk_nvme_tcp_term_req_fes fes; 1561 1562 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1563 pdu = tqpair->recv_pdu; 1564 1565 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1566 /* check header digest if needed */ 1567 if (pdu->has_hdgst) { 1568 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1569 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1570 if (rc == 0) { 1571 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1572 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1573 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1574 return; 1575 1576 } 1577 } 1578 1579 switch (pdu->hdr.common.pdu_type) { 1580 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1581 nvme_tcp_icresp_handle(tqpair, pdu); 1582 break; 1583 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1584 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1585 break; 1586 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1587 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1588 break; 1589 1590 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1591 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1592 break; 1593 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1594 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1595 break; 1596 1597 default: 1598 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1599 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1600 error_offset = 1; 1601 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1602 break; 1603 } 1604 1605 } 1606 1607 static int 1608 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1609 { 1610 int rc = 0; 1611 struct nvme_tcp_pdu *pdu; 1612 uint32_t data_len; 1613 enum nvme_tcp_pdu_recv_state prev_state; 1614 1615 /* The loop here is to allow for several back-to-back state changes. */ 1616 do { 1617 prev_state = tqpair->recv_state; 1618 switch (tqpair->recv_state) { 1619 /* If in a new state */ 1620 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1621 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1622 break; 1623 /* common header */ 1624 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1625 pdu = tqpair->recv_pdu; 1626 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1627 rc = nvme_tcp_read_data(tqpair->sock, 1628 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1629 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1630 if (rc < 0) { 1631 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1632 break; 1633 } 1634 pdu->ch_valid_bytes += rc; 1635 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1636 rc = NVME_TCP_PDU_IN_PROGRESS; 1637 goto out; 1638 } 1639 } 1640 1641 /* The command header of this PDU has now been read from the socket. */ 1642 nvme_tcp_pdu_ch_handle(tqpair); 1643 break; 1644 /* Wait for the pdu specific header */ 1645 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1646 pdu = tqpair->recv_pdu; 1647 rc = nvme_tcp_read_data(tqpair->sock, 1648 pdu->psh_len - pdu->psh_valid_bytes, 1649 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1650 if (rc < 0) { 1651 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1652 break; 1653 } 1654 1655 pdu->psh_valid_bytes += rc; 1656 if (pdu->psh_valid_bytes < pdu->psh_len) { 1657 rc = NVME_TCP_PDU_IN_PROGRESS; 1658 goto out; 1659 } 1660 1661 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1662 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1663 break; 1664 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1665 pdu = tqpair->recv_pdu; 1666 /* check whether the data is valid, if not we just return */ 1667 if (!pdu->data_len) { 1668 return NVME_TCP_PDU_IN_PROGRESS; 1669 } 1670 1671 data_len = pdu->data_len; 1672 /* data digest */ 1673 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1674 tqpair->flags.host_ddgst_enable)) { 1675 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1676 pdu->ddgst_enable = true; 1677 } 1678 1679 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1680 if (rc < 0) { 1681 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1682 break; 1683 } 1684 1685 pdu->rw_offset += rc; 1686 if (pdu->rw_offset < data_len) { 1687 rc = NVME_TCP_PDU_IN_PROGRESS; 1688 goto out; 1689 } 1690 1691 assert(pdu->rw_offset == data_len); 1692 /* All of this PDU has now been read from the socket. */ 1693 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1694 break; 1695 case NVME_TCP_PDU_RECV_STATE_ERROR: 1696 rc = NVME_TCP_PDU_FATAL; 1697 break; 1698 default: 1699 assert(0); 1700 break; 1701 } 1702 } while (prev_state != tqpair->recv_state); 1703 1704 out: 1705 *reaped += tqpair->async_complete; 1706 tqpair->async_complete = 0; 1707 1708 return rc; 1709 } 1710 1711 static void 1712 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1713 { 1714 uint64_t t02; 1715 struct nvme_tcp_req *tcp_req, *tmp; 1716 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1717 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1718 struct spdk_nvme_ctrlr_process *active_proc; 1719 1720 /* Don't check timeouts during controller initialization. */ 1721 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1722 return; 1723 } 1724 1725 if (nvme_qpair_is_admin_queue(qpair)) { 1726 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1727 } else { 1728 active_proc = qpair->active_proc; 1729 } 1730 1731 /* Only check timeouts if the current process has a timeout callback. */ 1732 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1733 return; 1734 } 1735 1736 t02 = spdk_get_ticks(); 1737 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1738 assert(tcp_req->req != NULL); 1739 1740 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1741 /* 1742 * The requests are in order, so as soon as one has not timed out, 1743 * stop iterating. 1744 */ 1745 break; 1746 } 1747 } 1748 } 1749 1750 static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, 1751 struct spdk_nvme_qpair *qpair); 1752 1753 static int 1754 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1755 { 1756 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1757 uint32_t reaped; 1758 int rc; 1759 1760 if (qpair->poll_group == NULL) { 1761 rc = spdk_sock_flush(tqpair->sock); 1762 if (rc < 0) { 1763 return rc; 1764 } 1765 } 1766 1767 if (max_completions == 0) { 1768 max_completions = tqpair->num_entries; 1769 } else { 1770 max_completions = spdk_min(max_completions, tqpair->num_entries); 1771 } 1772 1773 reaped = 0; 1774 do { 1775 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1776 if (rc < 0) { 1777 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1778 errno, spdk_strerror(errno)); 1779 goto fail; 1780 } else if (rc == 0) { 1781 /* Partial PDU is read */ 1782 break; 1783 } 1784 1785 } while (reaped < max_completions); 1786 1787 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1788 nvme_tcp_qpair_check_timeout(qpair); 1789 } 1790 1791 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 1792 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair); 1793 if (rc != 0 && rc != -EAGAIN) { 1794 SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair); 1795 goto fail; 1796 } else if (rc == 0) { 1797 /* Once the connection is completed, we can submit queued requests */ 1798 nvme_qpair_resubmit_requests(qpair, tqpair->num_entries); 1799 } 1800 } 1801 1802 return reaped; 1803 fail: 1804 1805 /* 1806 * Since admin queues take the ctrlr_lock before entering this function, 1807 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1808 * to call the generic function which will take the lock for us. 1809 */ 1810 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1811 1812 if (nvme_qpair_is_admin_queue(qpair)) { 1813 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1814 } else { 1815 nvme_ctrlr_disconnect_qpair(qpair); 1816 } 1817 return -ENXIO; 1818 } 1819 1820 static void 1821 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1822 { 1823 struct spdk_nvme_qpair *qpair = ctx; 1824 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1825 int32_t num_completions; 1826 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1827 1828 if (tqpair->needs_poll) { 1829 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1830 tqpair->needs_poll = false; 1831 } 1832 1833 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1834 1835 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1836 pgroup->num_completions += num_completions; 1837 pgroup->stats.nvme_completions += num_completions; 1838 } else { 1839 pgroup->num_completions = -ENXIO; 1840 } 1841 } 1842 1843 static int 1844 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1845 { 1846 struct spdk_nvme_tcp_ic_req *ic_req; 1847 struct nvme_tcp_pdu *pdu; 1848 1849 pdu = tqpair->send_pdu; 1850 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1851 ic_req = &pdu->hdr.ic_req; 1852 1853 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1854 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1855 ic_req->pfv = 0; 1856 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1857 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1858 1859 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1860 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1861 1862 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1863 1864 tqpair->icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1865 return 0; 1866 } 1867 1868 static int 1869 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1870 { 1871 struct sockaddr_storage dst_addr; 1872 struct sockaddr_storage src_addr; 1873 int rc; 1874 struct nvme_tcp_qpair *tqpair; 1875 int family; 1876 long int port; 1877 struct spdk_sock_opts opts; 1878 1879 tqpair = nvme_tcp_qpair(qpair); 1880 1881 switch (ctrlr->trid.adrfam) { 1882 case SPDK_NVMF_ADRFAM_IPV4: 1883 family = AF_INET; 1884 break; 1885 case SPDK_NVMF_ADRFAM_IPV6: 1886 family = AF_INET6; 1887 break; 1888 default: 1889 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1890 rc = -1; 1891 return rc; 1892 } 1893 1894 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1895 1896 memset(&dst_addr, 0, sizeof(dst_addr)); 1897 1898 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1899 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1900 if (rc != 0) { 1901 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1902 return rc; 1903 } 1904 1905 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1906 memset(&src_addr, 0, sizeof(src_addr)); 1907 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1908 if (rc != 0) { 1909 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1910 return rc; 1911 } 1912 } 1913 1914 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1915 if (port <= 0 || port >= INT_MAX) { 1916 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1917 rc = -1; 1918 return rc; 1919 } 1920 1921 opts.opts_size = sizeof(opts); 1922 spdk_sock_get_default_opts(&opts); 1923 opts.priority = ctrlr->trid.priority; 1924 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1925 if (ctrlr->opts.transport_ack_timeout) { 1926 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; 1927 } 1928 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1929 if (!tqpair->sock) { 1930 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1931 tqpair, ctrlr->trid.traddr, port); 1932 rc = -1; 1933 return rc; 1934 } 1935 1936 return 0; 1937 } 1938 1939 static int 1940 nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1941 { 1942 struct nvme_tcp_qpair *tqpair; 1943 int rc; 1944 1945 tqpair = nvme_tcp_qpair(qpair); 1946 1947 /* Prevent this function from being called recursively, as it could lead to issues with 1948 * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive 1949 * call. 1950 */ 1951 if (tqpair->flags.in_connect_poll) { 1952 return -EAGAIN; 1953 } 1954 1955 tqpair->flags.in_connect_poll = 1; 1956 1957 switch (tqpair->state) { 1958 case NVME_TCP_QPAIR_STATE_INVALID: 1959 case NVME_TCP_QPAIR_STATE_INITIALIZING: 1960 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) { 1961 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1962 rc = -ETIMEDOUT; 1963 break; 1964 } 1965 rc = -EAGAIN; 1966 break; 1967 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND: 1968 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1); 1969 if (rc < 0) { 1970 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1971 break; 1972 } 1973 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL; 1974 rc = -EAGAIN; 1975 break; 1976 case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL: 1977 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair); 1978 if (rc == 0) { 1979 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1980 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 1981 } else if (rc != -EAGAIN) { 1982 SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n"); 1983 } 1984 break; 1985 case NVME_TCP_QPAIR_STATE_RUNNING: 1986 rc = 0; 1987 break; 1988 default: 1989 assert(false); 1990 rc = -EINVAL; 1991 break; 1992 } 1993 1994 tqpair->flags.in_connect_poll = 0; 1995 return rc; 1996 } 1997 1998 static int 1999 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 2000 { 2001 int rc = 0; 2002 struct nvme_tcp_qpair *tqpair; 2003 struct nvme_tcp_poll_group *tgroup; 2004 2005 tqpair = nvme_tcp_qpair(qpair); 2006 2007 if (!tqpair->sock) { 2008 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2009 if (rc < 0) { 2010 return rc; 2011 } 2012 } 2013 2014 if (qpair->poll_group) { 2015 rc = nvme_poll_group_connect_qpair(qpair); 2016 if (rc) { 2017 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 2018 return rc; 2019 } 2020 tgroup = nvme_tcp_poll_group(qpair->poll_group); 2021 tqpair->stats = &tgroup->stats; 2022 tqpair->shared_stats = true; 2023 } else { 2024 tqpair->stats = calloc(1, sizeof(*tqpair->stats)); 2025 if (!tqpair->stats) { 2026 SPDK_ERRLOG("tcp stats memory allocation failed\n"); 2027 return -ENOMEM; 2028 } 2029 } 2030 2031 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 2032 /* Explicitly set the state and recv_state of tqpair */ 2033 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 2034 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 2035 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 2036 } 2037 rc = nvme_tcp_qpair_icreq_send(tqpair); 2038 if (rc != 0) { 2039 SPDK_ERRLOG("Unable to connect the tqpair\n"); 2040 return rc; 2041 } 2042 2043 return rc; 2044 } 2045 2046 static struct spdk_nvme_qpair * 2047 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 2048 uint16_t qid, uint32_t qsize, 2049 enum spdk_nvme_qprio qprio, 2050 uint32_t num_requests, bool async) 2051 { 2052 struct nvme_tcp_qpair *tqpair; 2053 struct spdk_nvme_qpair *qpair; 2054 int rc; 2055 2056 if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) { 2057 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n", 2058 qsize, SPDK_NVME_QUEUE_MIN_ENTRIES); 2059 return NULL; 2060 } 2061 2062 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 2063 if (!tqpair) { 2064 SPDK_ERRLOG("failed to get create tqpair\n"); 2065 return NULL; 2066 } 2067 2068 /* Set num_entries one less than queue size. According to NVMe 2069 * and NVMe-oF specs we can not submit queue size requests, 2070 * one slot shall always remain empty. 2071 */ 2072 tqpair->num_entries = qsize - 1; 2073 qpair = &tqpair->qpair; 2074 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async); 2075 if (rc != 0) { 2076 free(tqpair); 2077 return NULL; 2078 } 2079 2080 rc = nvme_tcp_alloc_reqs(tqpair); 2081 if (rc) { 2082 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2083 return NULL; 2084 } 2085 2086 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 2087 * So create the socket first when creating a qpair. */ 2088 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 2089 if (rc) { 2090 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 2091 return NULL; 2092 } 2093 2094 return qpair; 2095 } 2096 2097 static struct spdk_nvme_qpair * 2098 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 2099 const struct spdk_nvme_io_qpair_opts *opts) 2100 { 2101 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 2102 opts->io_queue_requests, opts->async_mode); 2103 } 2104 2105 /* We have to use the typedef in the function declaration to appease astyle. */ 2106 typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t; 2107 2108 static spdk_nvme_ctrlr_t * 2109 nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 2110 const struct spdk_nvme_ctrlr_opts *opts, 2111 void *devhandle) 2112 { 2113 struct nvme_tcp_ctrlr *tctrlr; 2114 int rc; 2115 2116 tctrlr = calloc(1, sizeof(*tctrlr)); 2117 if (tctrlr == NULL) { 2118 SPDK_ERRLOG("could not allocate ctrlr\n"); 2119 return NULL; 2120 } 2121 2122 tctrlr->ctrlr.opts = *opts; 2123 tctrlr->ctrlr.trid = *trid; 2124 2125 if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 2126 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 2127 NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 2128 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 2129 } 2130 2131 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2132 if (rc != 0) { 2133 free(tctrlr); 2134 return NULL; 2135 } 2136 2137 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2138 tctrlr->ctrlr.opts.admin_queue_size, 0, 2139 tctrlr->ctrlr.opts.admin_queue_size, true); 2140 if (!tctrlr->ctrlr.adminq) { 2141 SPDK_ERRLOG("failed to create admin qpair\n"); 2142 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2143 return NULL; 2144 } 2145 2146 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2147 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2148 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2149 return NULL; 2150 } 2151 2152 return &tctrlr->ctrlr; 2153 } 2154 2155 static uint32_t 2156 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2157 { 2158 /* TCP transport doesn't limit maximum IO transfer size. */ 2159 return UINT32_MAX; 2160 } 2161 2162 static uint16_t 2163 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2164 { 2165 /* 2166 * We do not support >1 SGE in the initiator currently, 2167 * so we can only return 1 here. Once that support is 2168 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 2169 * instead. 2170 */ 2171 return 1; 2172 } 2173 2174 static int 2175 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2176 int (*iter_fn)(struct nvme_request *req, void *arg), 2177 void *arg) 2178 { 2179 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2180 struct nvme_tcp_req *tcp_req, *tmp; 2181 int rc; 2182 2183 assert(iter_fn != NULL); 2184 2185 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2186 assert(tcp_req->req != NULL); 2187 2188 rc = iter_fn(tcp_req->req, arg); 2189 if (rc != 0) { 2190 return rc; 2191 } 2192 } 2193 2194 return 0; 2195 } 2196 2197 static void 2198 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2199 { 2200 struct nvme_tcp_req *tcp_req, *tmp; 2201 struct spdk_nvme_cpl cpl = {}; 2202 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2203 2204 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2205 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2206 2207 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2208 assert(tcp_req->req != NULL); 2209 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2210 continue; 2211 } 2212 2213 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false); 2214 } 2215 } 2216 2217 static struct spdk_nvme_transport_poll_group * 2218 nvme_tcp_poll_group_create(void) 2219 { 2220 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2221 2222 if (group == NULL) { 2223 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2224 return NULL; 2225 } 2226 2227 TAILQ_INIT(&group->needs_poll); 2228 2229 group->sock_group = spdk_sock_group_create(group); 2230 if (group->sock_group == NULL) { 2231 free(group); 2232 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2233 return NULL; 2234 } 2235 2236 return &group->group; 2237 } 2238 2239 static struct spdk_nvme_transport_poll_group * 2240 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2241 { 2242 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2243 struct spdk_sock_group *group = NULL; 2244 int rc; 2245 2246 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL); 2247 if (!rc && group != NULL) { 2248 return spdk_sock_group_get_ctx(group); 2249 } 2250 2251 return NULL; 2252 } 2253 2254 static int 2255 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2256 { 2257 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2258 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2259 2260 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2261 return -EPROTO; 2262 } 2263 return 0; 2264 } 2265 2266 static int 2267 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2268 { 2269 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2270 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2271 2272 if (tqpair->needs_poll) { 2273 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2274 tqpair->needs_poll = false; 2275 } 2276 2277 if (tqpair->sock && group->sock_group) { 2278 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2279 return -EPROTO; 2280 } 2281 } 2282 return 0; 2283 } 2284 2285 static int 2286 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2287 struct spdk_nvme_qpair *qpair) 2288 { 2289 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2290 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2291 2292 /* disconnected qpairs won't have a sock to add. */ 2293 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2294 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2295 return -EPROTO; 2296 } 2297 } 2298 2299 return 0; 2300 } 2301 2302 static int 2303 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2304 struct spdk_nvme_qpair *qpair) 2305 { 2306 struct nvme_tcp_qpair *tqpair; 2307 struct nvme_tcp_poll_group *group; 2308 2309 assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs); 2310 2311 tqpair = nvme_tcp_qpair(qpair); 2312 group = nvme_tcp_poll_group(tgroup); 2313 2314 assert(tqpair->shared_stats == true); 2315 tqpair->stats = &g_dummy_stats; 2316 2317 if (tqpair->needs_poll) { 2318 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2319 tqpair->needs_poll = false; 2320 } 2321 2322 return 0; 2323 } 2324 2325 static int64_t 2326 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2327 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2328 { 2329 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2330 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2331 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2332 int num_events; 2333 2334 group->completions_per_qpair = completions_per_qpair; 2335 group->num_completions = 0; 2336 group->stats.polls++; 2337 2338 num_events = spdk_sock_group_poll(group->sock_group); 2339 2340 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2341 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2342 } 2343 2344 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2345 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2346 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2347 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2348 } 2349 2350 if (spdk_unlikely(num_events < 0)) { 2351 return num_events; 2352 } 2353 2354 group->stats.idle_polls += !num_events; 2355 group->stats.socket_completions += num_events; 2356 2357 return group->num_completions; 2358 } 2359 2360 static int 2361 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2362 { 2363 int rc; 2364 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2365 2366 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2367 return -EBUSY; 2368 } 2369 2370 rc = spdk_sock_group_close(&group->sock_group); 2371 if (rc != 0) { 2372 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2373 assert(false); 2374 } 2375 2376 free(tgroup); 2377 2378 return 0; 2379 } 2380 2381 static int 2382 nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 2383 struct spdk_nvme_transport_poll_group_stat **_stats) 2384 { 2385 struct nvme_tcp_poll_group *group; 2386 struct spdk_nvme_transport_poll_group_stat *stats; 2387 2388 if (tgroup == NULL || _stats == NULL) { 2389 SPDK_ERRLOG("Invalid stats or group pointer\n"); 2390 return -EINVAL; 2391 } 2392 2393 group = nvme_tcp_poll_group(tgroup); 2394 2395 stats = calloc(1, sizeof(*stats)); 2396 if (!stats) { 2397 SPDK_ERRLOG("Can't allocate memory for TCP stats\n"); 2398 return -ENOMEM; 2399 } 2400 stats->trtype = SPDK_NVME_TRANSPORT_TCP; 2401 memcpy(&stats->tcp, &group->stats, sizeof(group->stats)); 2402 2403 *_stats = stats; 2404 2405 return 0; 2406 } 2407 2408 static void 2409 nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 2410 struct spdk_nvme_transport_poll_group_stat *stats) 2411 { 2412 free(stats); 2413 } 2414 2415 const struct spdk_nvme_transport_ops tcp_ops = { 2416 .name = "TCP", 2417 .type = SPDK_NVME_TRANSPORT_TCP, 2418 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2419 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2420 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2421 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2422 2423 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2424 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2425 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2426 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2427 .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async, 2428 .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async, 2429 .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async, 2430 .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async, 2431 2432 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2433 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2434 2435 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2436 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2437 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2438 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2439 2440 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2441 .qpair_reset = nvme_tcp_qpair_reset, 2442 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2443 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2444 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2445 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2446 2447 .poll_group_create = nvme_tcp_poll_group_create, 2448 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2449 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2450 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2451 .poll_group_add = nvme_tcp_poll_group_add, 2452 .poll_group_remove = nvme_tcp_poll_group_remove, 2453 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2454 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2455 .poll_group_get_stats = nvme_tcp_poll_group_get_stats, 2456 .poll_group_free_stats = nvme_tcp_poll_group_free_stats, 2457 }; 2458 2459 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2460