1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * NVMe/TCP transport 36 */ 37 38 #include "nvme_internal.h" 39 40 #include "spdk/endian.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/stdinc.h" 44 #include "spdk/crc32.h" 45 #include "spdk/endian.h" 46 #include "spdk/assert.h" 47 #include "spdk/string.h" 48 #include "spdk/thread.h" 49 #include "spdk/trace.h" 50 #include "spdk/util.h" 51 52 #include "spdk_internal/nvme_tcp.h" 53 54 #define NVME_TCP_RW_BUFFER_SIZE 131072 55 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 56 57 #define NVME_TCP_HPDA_DEFAULT 0 58 #define NVME_TCP_MAX_R2T_DEFAULT 1 59 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 60 61 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 62 struct nvme_tcp_ctrlr { 63 struct spdk_nvme_ctrlr ctrlr; 64 }; 65 66 struct nvme_tcp_poll_group { 67 struct spdk_nvme_transport_poll_group group; 68 struct spdk_sock_group *sock_group; 69 uint32_t completions_per_qpair; 70 int64_t num_completions; 71 72 TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; 73 }; 74 75 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 76 struct nvme_tcp_qpair { 77 struct spdk_nvme_qpair qpair; 78 struct spdk_sock *sock; 79 80 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 81 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 82 83 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 84 struct nvme_tcp_pdu *recv_pdu; 85 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 86 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 87 enum nvme_tcp_pdu_recv_state recv_state; 88 89 struct nvme_tcp_req *tcp_reqs; 90 91 uint16_t num_entries; 92 uint16_t async_complete; 93 94 struct { 95 uint16_t host_hdgst_enable: 1; 96 uint16_t host_ddgst_enable: 1; 97 uint16_t icreq_send_ack: 1; 98 uint16_t reserved: 13; 99 } flags; 100 101 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 102 uint32_t maxh2cdata; 103 104 uint32_t maxr2t; 105 106 /* 0 based value, which is used to guide the padding */ 107 uint8_t cpda; 108 109 enum nvme_tcp_qpair_state state; 110 111 TAILQ_ENTRY(nvme_tcp_qpair) link; 112 bool needs_poll; 113 }; 114 115 enum nvme_tcp_req_state { 116 NVME_TCP_REQ_FREE, 117 NVME_TCP_REQ_ACTIVE, 118 NVME_TCP_REQ_ACTIVE_R2T, 119 }; 120 121 struct nvme_tcp_req { 122 struct nvme_request *req; 123 enum nvme_tcp_req_state state; 124 uint16_t cid; 125 uint16_t ttag; 126 uint32_t datao; 127 uint32_t r2tl_remain; 128 uint32_t active_r2ts; 129 /* Used to hold a value received from subsequent R2T while we are still 130 * waiting for H2C complete */ 131 uint16_t ttag_r2t_next; 132 bool in_capsule_data; 133 bool pdu_in_use; 134 /* It is used to track whether the req can be safely freed */ 135 union { 136 uint8_t raw; 137 struct { 138 /* The last send operation completed - kernel released send buffer */ 139 uint8_t send_ack : 1; 140 /* Data transfer completed - target send resp or last data bit */ 141 uint8_t data_recv : 1; 142 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 143 * from kernel) to send H2C */ 144 uint8_t h2c_send_waiting_ack : 1; 145 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 146 * Rare case, actual when dealing with target that can send several R2T requests. 147 * SPDK TCP target sends 1 R2T for the whole data buffer */ 148 uint8_t r2t_waiting_h2c_complete : 1; 149 uint8_t reserved : 4; 150 } bits; 151 } ordering; 152 struct nvme_tcp_pdu *pdu; 153 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 154 uint32_t iovcnt; 155 /* Used to hold a value received from subsequent R2T while we are still 156 * waiting for H2C ack */ 157 uint32_t r2tl_remain_next; 158 struct nvme_tcp_qpair *tqpair; 159 TAILQ_ENTRY(nvme_tcp_req) link; 160 struct spdk_nvme_cpl rsp; 161 }; 162 163 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 164 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 165 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 166 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 167 168 static inline struct nvme_tcp_qpair * 169 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 170 { 171 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 172 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 173 } 174 175 static inline struct nvme_tcp_poll_group * 176 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 177 { 178 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 179 } 180 181 static inline struct nvme_tcp_ctrlr * 182 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 183 { 184 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 185 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 186 } 187 188 static struct nvme_tcp_req * 189 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 190 { 191 struct nvme_tcp_req *tcp_req; 192 193 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 194 if (!tcp_req) { 195 return NULL; 196 } 197 198 assert(tcp_req->state == NVME_TCP_REQ_FREE); 199 tcp_req->state = NVME_TCP_REQ_ACTIVE; 200 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 201 tcp_req->datao = 0; 202 tcp_req->req = NULL; 203 tcp_req->in_capsule_data = false; 204 tcp_req->pdu_in_use = false; 205 tcp_req->r2tl_remain = 0; 206 tcp_req->r2tl_remain_next = 0; 207 tcp_req->active_r2ts = 0; 208 tcp_req->iovcnt = 0; 209 tcp_req->ordering.raw = 0; 210 memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu)); 211 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 212 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 213 214 return tcp_req; 215 } 216 217 static void 218 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 219 { 220 assert(tcp_req->state != NVME_TCP_REQ_FREE); 221 tcp_req->state = NVME_TCP_REQ_FREE; 222 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 223 } 224 225 static int 226 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 227 { 228 struct addrinfo *res; 229 struct addrinfo hints; 230 int ret; 231 232 memset(&hints, 0, sizeof(hints)); 233 hints.ai_family = family; 234 hints.ai_socktype = SOCK_STREAM; 235 hints.ai_protocol = 0; 236 237 ret = getaddrinfo(addr, service, &hints, &res); 238 if (ret) { 239 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 240 return ret; 241 } 242 243 if (res->ai_addrlen > sizeof(*sa)) { 244 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 245 ret = -EINVAL; 246 } else { 247 memcpy(sa, res->ai_addr, res->ai_addrlen); 248 } 249 250 freeaddrinfo(res); 251 return ret; 252 } 253 254 static void 255 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 256 { 257 free(tqpair->tcp_reqs); 258 tqpair->tcp_reqs = NULL; 259 260 spdk_free(tqpair->send_pdus); 261 tqpair->send_pdus = NULL; 262 } 263 264 static int 265 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 266 { 267 uint16_t i; 268 struct nvme_tcp_req *tcp_req; 269 270 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 271 if (tqpair->tcp_reqs == NULL) { 272 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 273 goto fail; 274 } 275 276 /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */ 277 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu), 278 0x1000, NULL, 279 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 280 281 if (tqpair->send_pdus == NULL) { 282 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 283 goto fail; 284 } 285 286 TAILQ_INIT(&tqpair->send_queue); 287 TAILQ_INIT(&tqpair->free_reqs); 288 TAILQ_INIT(&tqpair->outstanding_reqs); 289 for (i = 0; i < tqpair->num_entries; i++) { 290 tcp_req = &tqpair->tcp_reqs[i]; 291 tcp_req->cid = i; 292 tcp_req->tqpair = tqpair; 293 tcp_req->pdu = &tqpair->send_pdus[i]; 294 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 295 } 296 297 tqpair->send_pdu = &tqpair->send_pdus[i]; 298 tqpair->recv_pdu = &tqpair->send_pdus[i + 1]; 299 300 return 0; 301 fail: 302 nvme_tcp_free_reqs(tqpair); 303 return -ENOMEM; 304 } 305 306 static void 307 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 308 { 309 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 310 struct nvme_tcp_pdu *pdu; 311 int rc; 312 struct nvme_tcp_poll_group *group; 313 314 if (tqpair->needs_poll) { 315 group = nvme_tcp_poll_group(qpair->poll_group); 316 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 317 tqpair->needs_poll = false; 318 } 319 320 rc = spdk_sock_close(&tqpair->sock); 321 322 if (tqpair->sock != NULL) { 323 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc); 324 /* Set it to NULL manually */ 325 tqpair->sock = NULL; 326 } 327 328 /* clear the send_queue */ 329 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 330 pdu = TAILQ_FIRST(&tqpair->send_queue); 331 /* Remove the pdu from the send_queue to prevent the wrong sending out 332 * in the next round connection 333 */ 334 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 335 } 336 } 337 338 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 339 340 static int 341 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 342 { 343 struct nvme_tcp_qpair *tqpair; 344 345 assert(qpair != NULL); 346 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 347 nvme_tcp_qpair_abort_reqs(qpair, 1); 348 nvme_qpair_deinit(qpair); 349 tqpair = nvme_tcp_qpair(qpair); 350 nvme_tcp_free_reqs(tqpair); 351 free(tqpair); 352 353 return 0; 354 } 355 356 static int 357 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 358 { 359 return 0; 360 } 361 362 static int 363 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 364 { 365 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 366 367 if (ctrlr->adminq) { 368 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 369 } 370 371 nvme_ctrlr_destruct_finish(ctrlr); 372 373 free(tctrlr); 374 375 return 0; 376 } 377 378 static void 379 _pdu_write_done(void *cb_arg, int err) 380 { 381 struct nvme_tcp_pdu *pdu = cb_arg; 382 struct nvme_tcp_qpair *tqpair = pdu->qpair; 383 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 384 385 /* If there are queued requests, we assume they are queued because they are waiting 386 * for resources to be released. Those resources are almost certainly released in 387 * response to a PDU completing here. However, to attempt to make forward progress 388 * the qpair needs to be polled and we can't rely on another network event to make 389 * that happen. Add it to a list of qpairs to poll regardless of network activity 390 * here. */ 391 if (pgroup && !STAILQ_EMPTY(&tqpair->qpair.queued_req) && !tqpair->needs_poll) { 392 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 393 tqpair->needs_poll = true; 394 } 395 396 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 397 398 if (err != 0) { 399 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 400 return; 401 } 402 403 assert(pdu->cb_fn != NULL); 404 pdu->cb_fn(pdu->cb_arg); 405 } 406 407 static void 408 _tcp_write_pdu(struct nvme_tcp_pdu *pdu) 409 { 410 uint32_t mapped_length = 0; 411 struct nvme_tcp_qpair *tqpair = pdu->qpair; 412 413 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 414 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 415 &mapped_length); 416 pdu->sock_req.cb_fn = _pdu_write_done; 417 pdu->sock_req.cb_arg = pdu; 418 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 419 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 420 } 421 422 static void 423 data_crc32_accel_done(void *cb_arg, int status) 424 { 425 struct nvme_tcp_pdu *pdu = cb_arg; 426 427 if (spdk_unlikely(status)) { 428 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 429 _pdu_write_done(pdu, status); 430 return; 431 } 432 433 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 434 MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 435 436 _tcp_write_pdu(pdu); 437 } 438 439 static void 440 pdu_data_crc32_compute(struct nvme_tcp_pdu *pdu) 441 { 442 struct nvme_tcp_qpair *tqpair = pdu->qpair; 443 uint32_t crc32c; 444 struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 445 446 /* Data Digest */ 447 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 448 tqpair->flags.host_ddgst_enable) { 449 /* Only suport this limited case for the first step */ 450 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 451 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 452 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0))) { 453 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 454 &pdu->data_digest_crc32, pdu->data_iov, 455 pdu->data_iovcnt, 0, data_crc32_accel_done, pdu); 456 return; 457 } 458 459 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 460 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 461 } 462 463 _tcp_write_pdu(pdu); 464 } 465 466 static int 467 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 468 struct nvme_tcp_pdu *pdu, 469 nvme_tcp_qpair_xfer_complete_cb cb_fn, 470 void *cb_arg) 471 { 472 int hlen; 473 uint32_t crc32c; 474 475 hlen = pdu->hdr.common.hlen; 476 pdu->cb_fn = cb_fn; 477 pdu->cb_arg = cb_arg; 478 pdu->qpair = tqpair; 479 480 /* Header Digest */ 481 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 482 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 483 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 484 } 485 486 pdu_data_crc32_compute(pdu); 487 488 return 0; 489 } 490 491 /* 492 * Build SGL describing contiguous payload buffer. 493 */ 494 static int 495 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 496 { 497 struct nvme_request *req = tcp_req->req; 498 499 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 500 tcp_req->iov[0].iov_len = req->payload_size; 501 tcp_req->iovcnt = 1; 502 503 SPDK_DEBUGLOG(nvme, "enter\n"); 504 505 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 506 507 return 0; 508 } 509 510 /* 511 * Build SGL describing scattered payload buffer. 512 */ 513 static int 514 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 515 { 516 int rc; 517 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 518 struct nvme_request *req = tcp_req->req; 519 520 SPDK_DEBUGLOG(nvme, "enter\n"); 521 522 assert(req->payload_size != 0); 523 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 524 assert(req->payload.reset_sgl_fn != NULL); 525 assert(req->payload.next_sge_fn != NULL); 526 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 527 528 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 529 remaining_size = req->payload_size; 530 531 do { 532 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 533 &length); 534 if (rc) { 535 return -1; 536 } 537 538 length = spdk_min(length, remaining_size); 539 tcp_req->iov[iovcnt].iov_len = length; 540 remaining_size -= length; 541 iovcnt++; 542 } while (remaining_size > 0 && iovcnt < max_num_sgl); 543 544 545 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 546 if (remaining_size > 0) { 547 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 548 tcp_req, iovcnt, remaining_size); 549 return -1; 550 } 551 552 tcp_req->iovcnt = iovcnt; 553 554 return 0; 555 } 556 557 static int 558 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 559 struct nvme_tcp_req *tcp_req) 560 { 561 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 562 int rc = 0; 563 enum spdk_nvme_data_transfer xfer; 564 uint32_t max_incapsule_data_size; 565 566 tcp_req->req = req; 567 req->cmd.cid = tcp_req->cid; 568 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 569 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 570 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 571 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 572 573 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 574 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 575 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 576 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 577 } else { 578 rc = -1; 579 } 580 581 if (rc) { 582 return rc; 583 } 584 585 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 586 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 587 588 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 589 } else { 590 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 591 } 592 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 593 max_incapsule_data_size = ctrlr->ioccsz_bytes; 594 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 595 max_incapsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 596 } 597 598 if (req->payload_size <= max_incapsule_data_size) { 599 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 600 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 601 req->cmd.dptr.sgl1.address = 0; 602 tcp_req->in_capsule_data = true; 603 } 604 } 605 606 return 0; 607 } 608 609 static inline bool 610 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 611 { 612 struct spdk_nvme_cpl cpl; 613 spdk_nvme_cmd_cb user_cb; 614 void *user_cb_arg; 615 struct spdk_nvme_qpair *qpair; 616 struct nvme_request *req; 617 618 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 619 return false; 620 } 621 622 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 623 assert(tcp_req->tqpair != NULL); 624 assert(tcp_req->req != NULL); 625 626 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 627 628 if (!tcp_req->tqpair->qpair.in_completion_context) { 629 tcp_req->tqpair->async_complete++; 630 } 631 632 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 633 memcpy(&cpl, &tcp_req->rsp, sizeof(cpl)); 634 user_cb = tcp_req->req->cb_fn; 635 user_cb_arg = tcp_req->req->cb_arg; 636 qpair = tcp_req->req->qpair; 637 req = tcp_req->req; 638 639 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 640 nvme_tcp_req_put(tcp_req->tqpair, tcp_req); 641 nvme_free_request(tcp_req->req); 642 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 643 644 return true; 645 } 646 647 static void 648 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 649 { 650 struct nvme_tcp_req *tcp_req = cb_arg; 651 652 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 653 tcp_req->tqpair->qpair.id); 654 tcp_req->ordering.bits.send_ack = 1; 655 /* Handle the r2t case */ 656 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 657 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 658 nvme_tcp_send_h2c_data(tcp_req); 659 } else { 660 nvme_tcp_req_complete_safe(tcp_req); 661 } 662 } 663 664 static int 665 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 666 struct nvme_tcp_req *tcp_req) 667 { 668 struct nvme_tcp_pdu *pdu; 669 struct spdk_nvme_tcp_cmd *capsule_cmd; 670 uint32_t plen = 0, alignment; 671 uint8_t pdo; 672 673 SPDK_DEBUGLOG(nvme, "enter\n"); 674 pdu = tcp_req->pdu; 675 676 capsule_cmd = &pdu->hdr.capsule_cmd; 677 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 678 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 679 capsule_cmd->ccsqe = tcp_req->req->cmd; 680 681 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 682 683 if (tqpair->flags.host_hdgst_enable) { 684 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 685 tcp_req); 686 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 687 plen += SPDK_NVME_TCP_DIGEST_LEN; 688 } 689 690 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 691 goto end; 692 } 693 694 pdo = plen; 695 pdu->padding_len = 0; 696 if (tqpair->cpda) { 697 alignment = (tqpair->cpda + 1) << 2; 698 if (alignment > plen) { 699 pdu->padding_len = alignment - plen; 700 pdo = alignment; 701 plen = alignment; 702 } 703 } 704 705 capsule_cmd->common.pdo = pdo; 706 plen += tcp_req->req->payload_size; 707 if (tqpair->flags.host_ddgst_enable) { 708 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 709 plen += SPDK_NVME_TCP_DIGEST_LEN; 710 } 711 712 tcp_req->datao = 0; 713 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 714 0, tcp_req->req->payload_size); 715 end: 716 capsule_cmd->common.plen = plen; 717 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 718 719 } 720 721 static int 722 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 723 struct nvme_request *req) 724 { 725 struct nvme_tcp_qpair *tqpair; 726 struct nvme_tcp_req *tcp_req; 727 728 tqpair = nvme_tcp_qpair(qpair); 729 assert(tqpair != NULL); 730 assert(req != NULL); 731 732 tcp_req = nvme_tcp_req_get(tqpair); 733 if (!tcp_req) { 734 /* Inform the upper layer to try again later. */ 735 return -EAGAIN; 736 } 737 738 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 739 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 740 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 741 nvme_tcp_req_put(tqpair, tcp_req); 742 return -1; 743 } 744 745 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 746 } 747 748 static int 749 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 750 { 751 return 0; 752 } 753 754 static void 755 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 756 struct spdk_nvme_cpl *rsp) 757 { 758 struct nvme_request *req; 759 760 assert(tcp_req->req != NULL); 761 req = tcp_req->req; 762 763 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 764 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 765 nvme_free_request(req); 766 } 767 768 static void 769 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 770 { 771 struct nvme_tcp_req *tcp_req, *tmp; 772 struct spdk_nvme_cpl cpl; 773 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 774 775 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 776 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 777 cpl.status.dnr = dnr; 778 779 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 780 nvme_tcp_req_complete(tcp_req, &cpl); 781 nvme_tcp_req_put(tqpair, tcp_req); 782 } 783 } 784 785 static void 786 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 787 enum nvme_tcp_pdu_recv_state state) 788 { 789 if (tqpair->recv_state == state) { 790 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 791 tqpair, state); 792 return; 793 } 794 795 tqpair->recv_state = state; 796 switch (state) { 797 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 798 case NVME_TCP_PDU_RECV_STATE_ERROR: 799 memset(tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 800 break; 801 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 802 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 803 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 804 default: 805 break; 806 } 807 } 808 809 static void 810 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 811 { 812 struct nvme_tcp_qpair *tqpair = cb_arg; 813 814 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 815 } 816 817 static void 818 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 819 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 820 { 821 struct nvme_tcp_pdu *rsp_pdu; 822 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 823 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 824 uint8_t copy_len; 825 826 rsp_pdu = tqpair->send_pdu; 827 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 828 h2c_term_req = &rsp_pdu->hdr.term_req; 829 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 830 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 831 832 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 833 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 834 DSET32(&h2c_term_req->fei, error_offset); 835 } 836 837 copy_len = pdu->hdr.common.hlen; 838 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 839 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 840 } 841 842 /* Copy the error info into the buffer */ 843 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 844 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 845 846 /* Contain the header len of the wrong received pdu */ 847 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 848 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 849 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair); 850 } 851 852 static void 853 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 854 { 855 struct nvme_tcp_pdu *pdu; 856 uint32_t error_offset = 0; 857 enum spdk_nvme_tcp_term_req_fes fes; 858 uint32_t expected_hlen, hd_len = 0; 859 bool plen_error = false; 860 861 pdu = tqpair->recv_pdu; 862 863 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 864 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 865 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 866 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 867 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 868 goto err; 869 } 870 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 871 if (pdu->hdr.common.plen != expected_hlen) { 872 plen_error = true; 873 } 874 } else { 875 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 876 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotitated\n"); 877 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 878 goto err; 879 } 880 881 switch (pdu->hdr.common.pdu_type) { 882 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 883 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 884 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 885 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 886 } 887 888 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 889 plen_error = true; 890 } 891 break; 892 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 893 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 894 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 895 plen_error = true; 896 } 897 break; 898 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 899 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 900 if ((pdu->hdr.common.plen <= expected_hlen) || 901 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 902 plen_error = true; 903 } 904 break; 905 case SPDK_NVME_TCP_PDU_TYPE_R2T: 906 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 907 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 908 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 909 } 910 911 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 912 plen_error = true; 913 } 914 break; 915 916 default: 917 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 918 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 919 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 920 goto err; 921 } 922 } 923 924 if (pdu->hdr.common.hlen != expected_hlen) { 925 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 926 expected_hlen, pdu->hdr.common.hlen); 927 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 928 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 929 goto err; 930 931 } else if (plen_error) { 932 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 933 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 934 goto err; 935 } else { 936 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 937 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 938 return; 939 } 940 err: 941 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 942 } 943 944 static struct nvme_tcp_req * 945 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 946 { 947 assert(tqpair != NULL); 948 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 949 return NULL; 950 } 951 952 return &tqpair->tcp_reqs[cid]; 953 } 954 955 static void 956 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 957 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 958 { 959 struct nvme_tcp_req *tcp_req; 960 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 961 uint8_t flags; 962 963 tcp_req = pdu->req; 964 assert(tcp_req != NULL); 965 966 SPDK_DEBUGLOG(nvme, "enter\n"); 967 c2h_data = &pdu->hdr.c2h_data; 968 tcp_req->datao += pdu->data_len; 969 flags = c2h_data->common.flags; 970 971 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 972 if (tcp_req->datao == tcp_req->req->payload_size) { 973 tcp_req->rsp.status.p = 0; 974 } else { 975 tcp_req->rsp.status.p = 1; 976 } 977 978 tcp_req->rsp.cid = tcp_req->cid; 979 tcp_req->rsp.sqid = tqpair->qpair.id; 980 tcp_req->ordering.bits.data_recv = 1; 981 982 if (nvme_tcp_req_complete_safe(tcp_req)) { 983 (*reaped)++; 984 } 985 } 986 } 987 988 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 989 "Invalid PDU Header Field", 990 "PDU Sequence Error", 991 "Header Digest Error", 992 "Data Transfer Out of Range", 993 "Data Transfer Limit Exceeded", 994 "Unsupported parameter", 995 }; 996 997 static void 998 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 999 { 1000 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 1001 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 1002 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 1003 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 1004 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 1005 DGET32(c2h_term_req->fei)); 1006 } 1007 /* we may also need to dump some other info here */ 1008 } 1009 1010 static void 1011 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 1012 struct nvme_tcp_pdu *pdu) 1013 { 1014 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 1015 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1016 } 1017 1018 static void 1019 _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1020 { 1021 struct nvme_tcp_pdu *pdu; 1022 1023 assert(tqpair != NULL); 1024 pdu = tqpair->recv_pdu; 1025 1026 switch (pdu->hdr.common.pdu_type) { 1027 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1028 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 1029 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1030 break; 1031 1032 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1033 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 1034 break; 1035 1036 default: 1037 /* The code should not go to here */ 1038 SPDK_ERRLOG("The code should not go to here\n"); 1039 break; 1040 } 1041 } 1042 1043 static void 1044 tcp_data_recv_crc32_done(void *cb_arg, int status) 1045 { 1046 struct nvme_tcp_req *tcp_req = cb_arg; 1047 struct nvme_tcp_pdu *pdu; 1048 struct nvme_tcp_qpair *tqpair; 1049 int rc; 1050 struct nvme_tcp_poll_group *pgroup; 1051 int dummy_reaped = 0; 1052 1053 pdu = tcp_req->pdu; 1054 assert(pdu != NULL); 1055 1056 tqpair = tcp_req->tqpair; 1057 assert(tqpair != NULL); 1058 1059 if (!tqpair->needs_poll) { 1060 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1061 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); 1062 tqpair->needs_poll = true; 1063 } 1064 1065 if (spdk_unlikely(status)) { 1066 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu); 1067 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1068 goto end; 1069 } 1070 1071 pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR; 1072 rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32); 1073 if (rc == 0) { 1074 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1075 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1076 } 1077 1078 end: 1079 tcp_req->pdu_in_use = false; 1080 nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped); 1081 } 1082 1083 static void 1084 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 1085 uint32_t *reaped) 1086 { 1087 int rc = 0; 1088 struct nvme_tcp_pdu *pdu; 1089 uint32_t crc32c; 1090 struct nvme_tcp_poll_group *tgroup; 1091 struct nvme_tcp_req *tcp_req; 1092 1093 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1094 pdu = tqpair->recv_pdu; 1095 1096 SPDK_DEBUGLOG(nvme, "enter\n"); 1097 1098 /* check data digest if need */ 1099 if (pdu->ddgst_enable) { 1100 tcp_req = pdu->req; 1101 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); 1102 /* Only suport this limitated case for the first step */ 1103 if ((nvme_qpair_get_state(&tqpair->qpair) >= NVME_QPAIR_CONNECTED) && 1104 (tgroup != NULL && tgroup->group.group->accel_fn_table.submit_accel_crc32c) && 1105 spdk_likely(!pdu->dif_ctx && (pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT == 0) 1106 && !tcp_req->pdu_in_use)) { 1107 1108 tcp_req->pdu_in_use = true; 1109 tcp_req->pdu->hdr = pdu->hdr; 1110 tcp_req->pdu->req = tcp_req; 1111 memcpy(tcp_req->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest)); 1112 memcpy(tcp_req->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt); 1113 tcp_req->pdu->data_iovcnt = pdu->data_iovcnt; 1114 tcp_req->pdu->data_len = pdu->data_len; 1115 1116 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1117 tgroup->group.group->accel_fn_table.submit_accel_crc32c(tgroup->group.group->ctx, 1118 &tcp_req->pdu->data_digest_crc32, tcp_req->pdu->data_iov, 1119 tcp_req->pdu->data_iovcnt, 0, tcp_data_recv_crc32_done, tcp_req); 1120 return; 1121 } 1122 1123 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 1124 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 1125 if (rc == 0) { 1126 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1127 tcp_req = pdu->req; 1128 assert(tcp_req != NULL); 1129 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR; 1130 } 1131 } 1132 1133 _nvme_tcp_pdu_payload_handle(tqpair, reaped); 1134 } 1135 1136 static void 1137 nvme_tcp_send_icreq_complete(void *cb_arg) 1138 { 1139 struct nvme_tcp_qpair *tqpair = cb_arg; 1140 1141 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 1142 1143 tqpair->flags.icreq_send_ack = true; 1144 1145 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 1146 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finilize icresp\n", tqpair, tqpair->qpair.id); 1147 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1148 } 1149 } 1150 1151 static void 1152 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1153 struct nvme_tcp_pdu *pdu) 1154 { 1155 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1156 uint32_t error_offset = 0; 1157 enum spdk_nvme_tcp_term_req_fes fes; 1158 int recv_buf_size; 1159 1160 /* Only PFV 0 is defined currently */ 1161 if (ic_resp->pfv != 0) { 1162 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1163 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1164 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1165 goto end; 1166 } 1167 1168 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1169 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1170 ic_resp->maxh2cdata); 1171 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1172 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1173 goto end; 1174 } 1175 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1176 1177 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1178 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1179 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1180 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1181 goto end; 1182 } 1183 tqpair->cpda = ic_resp->cpda; 1184 1185 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1186 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1187 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1188 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1189 1190 /* Now that we know whether digests are enabled, properly size the receive buffer to 1191 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1192 * parameter. */ 1193 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1194 1195 if (tqpair->flags.host_hdgst_enable) { 1196 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1197 } 1198 1199 if (tqpair->flags.host_ddgst_enable) { 1200 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1201 } 1202 1203 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1204 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1205 tqpair, 1206 recv_buf_size); 1207 /* Not fatal. */ 1208 } 1209 1210 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1211 1212 if (!tqpair->flags.icreq_send_ack) { 1213 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1214 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1215 return; 1216 } 1217 1218 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1219 return; 1220 end: 1221 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1222 } 1223 1224 static void 1225 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1226 uint32_t *reaped) 1227 { 1228 struct nvme_tcp_req *tcp_req; 1229 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1230 uint32_t cid, error_offset = 0; 1231 enum spdk_nvme_tcp_term_req_fes fes; 1232 1233 SPDK_DEBUGLOG(nvme, "enter\n"); 1234 cid = capsule_resp->rccqe.cid; 1235 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1236 1237 if (!tcp_req) { 1238 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1239 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1240 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1241 goto end; 1242 } 1243 1244 assert(tcp_req->req != NULL); 1245 1246 tcp_req->rsp = capsule_resp->rccqe; 1247 tcp_req->ordering.bits.data_recv = 1; 1248 1249 /* Recv the pdu again */ 1250 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1251 1252 if (nvme_tcp_req_complete_safe(tcp_req)) { 1253 (*reaped)++; 1254 } 1255 1256 return; 1257 1258 end: 1259 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1260 } 1261 1262 static void 1263 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1264 struct nvme_tcp_pdu *pdu) 1265 { 1266 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1267 uint32_t error_offset = 0; 1268 enum spdk_nvme_tcp_term_req_fes fes; 1269 1270 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1271 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1272 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1273 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1274 goto end; 1275 } 1276 1277 /* set the data buffer */ 1278 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1279 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1280 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1281 return; 1282 end: 1283 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1284 } 1285 1286 static void 1287 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1288 { 1289 struct nvme_tcp_req *tcp_req; 1290 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1291 uint32_t error_offset = 0; 1292 enum spdk_nvme_tcp_term_req_fes fes; 1293 1294 SPDK_DEBUGLOG(nvme, "enter\n"); 1295 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1296 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1297 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1298 if (!tcp_req) { 1299 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1300 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1301 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1302 goto end; 1303 1304 } 1305 1306 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): datao=%u, payload_size=%u\n", 1307 tcp_req, tqpair, tcp_req->datao, tcp_req->req->payload_size); 1308 1309 if (c2h_data->datal > tcp_req->req->payload_size) { 1310 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1311 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1312 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1313 goto end; 1314 } 1315 1316 if (tcp_req->datao != c2h_data->datao) { 1317 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != datao(%u) in tcp_req\n", 1318 tcp_req, c2h_data->datao, tcp_req->datao); 1319 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1320 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1321 goto end; 1322 } 1323 1324 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1325 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1326 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1327 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1328 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1329 goto end; 1330 1331 } 1332 1333 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1334 c2h_data->datao, c2h_data->datal); 1335 pdu->req = tcp_req; 1336 1337 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1338 return; 1339 1340 end: 1341 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1342 } 1343 1344 static void 1345 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1346 { 1347 struct nvme_tcp_req *tcp_req = cb_arg; 1348 1349 assert(tcp_req != NULL); 1350 1351 tcp_req->ordering.bits.send_ack = 1; 1352 if (tcp_req->r2tl_remain) { 1353 nvme_tcp_send_h2c_data(tcp_req); 1354 } else { 1355 assert(tcp_req->active_r2ts > 0); 1356 tcp_req->active_r2ts--; 1357 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1358 1359 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1360 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1361 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1362 assert(tcp_req->active_r2ts > 0); 1363 tcp_req->ttag = tcp_req->ttag_r2t_next; 1364 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1365 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1366 nvme_tcp_send_h2c_data(tcp_req); 1367 return; 1368 } 1369 1370 /* Need also call this function to free the resource */ 1371 nvme_tcp_req_complete_safe(tcp_req); 1372 } 1373 } 1374 1375 static void 1376 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1377 { 1378 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1379 struct nvme_tcp_pdu *rsp_pdu; 1380 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1381 uint32_t plen, pdo, alignment; 1382 1383 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1384 tcp_req->ordering.bits.send_ack = 0; 1385 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1386 rsp_pdu = tcp_req->pdu; 1387 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1388 h2c_data = &rsp_pdu->hdr.h2c_data; 1389 1390 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1391 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1392 h2c_data->cccid = tcp_req->cid; 1393 h2c_data->ttag = tcp_req->ttag; 1394 h2c_data->datao = tcp_req->datao; 1395 1396 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1397 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1398 h2c_data->datao, h2c_data->datal); 1399 tcp_req->r2tl_remain -= h2c_data->datal; 1400 1401 if (tqpair->flags.host_hdgst_enable) { 1402 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1403 plen += SPDK_NVME_TCP_DIGEST_LEN; 1404 } 1405 1406 rsp_pdu->padding_len = 0; 1407 pdo = plen; 1408 if (tqpair->cpda) { 1409 alignment = (tqpair->cpda + 1) << 2; 1410 if (alignment > plen) { 1411 rsp_pdu->padding_len = alignment - plen; 1412 pdo = plen = alignment; 1413 } 1414 } 1415 1416 h2c_data->common.pdo = pdo; 1417 plen += h2c_data->datal; 1418 if (tqpair->flags.host_ddgst_enable) { 1419 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1420 plen += SPDK_NVME_TCP_DIGEST_LEN; 1421 } 1422 1423 h2c_data->common.plen = plen; 1424 tcp_req->datao += h2c_data->datal; 1425 if (!tcp_req->r2tl_remain) { 1426 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1427 } 1428 1429 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1430 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1431 1432 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1433 } 1434 1435 static void 1436 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1437 { 1438 struct nvme_tcp_req *tcp_req; 1439 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1440 uint32_t cid, error_offset = 0; 1441 enum spdk_nvme_tcp_term_req_fes fes; 1442 1443 SPDK_DEBUGLOG(nvme, "enter\n"); 1444 cid = r2t->cccid; 1445 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1446 if (!tcp_req) { 1447 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1448 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1449 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1450 goto end; 1451 } 1452 1453 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1454 tqpair); 1455 1456 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1457 assert(tcp_req->active_r2ts == 0); 1458 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1459 } 1460 1461 if (tcp_req->datao != r2t->r2to) { 1462 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1463 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1464 goto end; 1465 1466 } 1467 1468 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1469 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1470 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1471 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1472 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1473 goto end; 1474 } 1475 1476 tcp_req->active_r2ts++; 1477 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1478 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1479 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1480 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1481 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1482 tcp_req->ttag_r2t_next = r2t->ttag; 1483 tcp_req->r2tl_remain_next = r2t->r2tl; 1484 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1485 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1486 return; 1487 } else { 1488 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1489 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1490 tqpair); 1491 goto end; 1492 } 1493 } 1494 1495 tcp_req->ttag = r2t->ttag; 1496 tcp_req->r2tl_remain = r2t->r2tl; 1497 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1498 1499 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1500 nvme_tcp_send_h2c_data(tcp_req); 1501 } else { 1502 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1503 } 1504 1505 return; 1506 1507 end: 1508 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1509 1510 } 1511 1512 static void 1513 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1514 { 1515 struct nvme_tcp_pdu *pdu; 1516 int rc; 1517 uint32_t crc32c, error_offset = 0; 1518 enum spdk_nvme_tcp_term_req_fes fes; 1519 1520 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1521 pdu = tqpair->recv_pdu; 1522 1523 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1524 /* check header digest if needed */ 1525 if (pdu->has_hdgst) { 1526 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1527 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1528 if (rc == 0) { 1529 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1530 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1531 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1532 return; 1533 1534 } 1535 } 1536 1537 switch (pdu->hdr.common.pdu_type) { 1538 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1539 nvme_tcp_icresp_handle(tqpair, pdu); 1540 break; 1541 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1542 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1543 break; 1544 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1545 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1546 break; 1547 1548 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1549 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1550 break; 1551 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1552 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1553 break; 1554 1555 default: 1556 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type); 1557 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1558 error_offset = 1; 1559 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1560 break; 1561 } 1562 1563 } 1564 1565 static int 1566 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1567 { 1568 int rc = 0; 1569 struct nvme_tcp_pdu *pdu; 1570 uint32_t data_len; 1571 enum nvme_tcp_pdu_recv_state prev_state; 1572 1573 /* The loop here is to allow for several back-to-back state changes. */ 1574 do { 1575 prev_state = tqpair->recv_state; 1576 switch (tqpair->recv_state) { 1577 /* If in a new state */ 1578 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1579 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1580 break; 1581 /* common header */ 1582 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1583 pdu = tqpair->recv_pdu; 1584 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1585 rc = nvme_tcp_read_data(tqpair->sock, 1586 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1587 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1588 if (rc < 0) { 1589 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1590 break; 1591 } 1592 pdu->ch_valid_bytes += rc; 1593 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1594 rc = NVME_TCP_PDU_IN_PROGRESS; 1595 goto out; 1596 } 1597 } 1598 1599 /* The command header of this PDU has now been read from the socket. */ 1600 nvme_tcp_pdu_ch_handle(tqpair); 1601 break; 1602 /* Wait for the pdu specific header */ 1603 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1604 pdu = tqpair->recv_pdu; 1605 rc = nvme_tcp_read_data(tqpair->sock, 1606 pdu->psh_len - pdu->psh_valid_bytes, 1607 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1608 if (rc < 0) { 1609 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1610 break; 1611 } 1612 1613 pdu->psh_valid_bytes += rc; 1614 if (pdu->psh_valid_bytes < pdu->psh_len) { 1615 rc = NVME_TCP_PDU_IN_PROGRESS; 1616 goto out; 1617 } 1618 1619 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1620 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1621 break; 1622 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1623 pdu = tqpair->recv_pdu; 1624 /* check whether the data is valid, if not we just return */ 1625 if (!pdu->data_len) { 1626 return NVME_TCP_PDU_IN_PROGRESS; 1627 } 1628 1629 data_len = pdu->data_len; 1630 /* data digest */ 1631 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1632 tqpair->flags.host_ddgst_enable)) { 1633 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1634 pdu->ddgst_enable = true; 1635 } 1636 1637 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1638 if (rc < 0) { 1639 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1640 break; 1641 } 1642 1643 pdu->rw_offset += rc; 1644 if (pdu->rw_offset < data_len) { 1645 rc = NVME_TCP_PDU_IN_PROGRESS; 1646 goto out; 1647 } 1648 1649 assert(pdu->rw_offset == data_len); 1650 /* All of this PDU has now been read from the socket. */ 1651 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1652 break; 1653 case NVME_TCP_PDU_RECV_STATE_ERROR: 1654 rc = NVME_TCP_PDU_FATAL; 1655 break; 1656 default: 1657 assert(0); 1658 break; 1659 } 1660 } while (prev_state != tqpair->recv_state); 1661 1662 out: 1663 *reaped += tqpair->async_complete; 1664 tqpair->async_complete = 0; 1665 1666 return rc; 1667 } 1668 1669 static void 1670 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1671 { 1672 uint64_t t02; 1673 struct nvme_tcp_req *tcp_req, *tmp; 1674 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1675 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1676 struct spdk_nvme_ctrlr_process *active_proc; 1677 1678 /* Don't check timeouts during controller initialization. */ 1679 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1680 return; 1681 } 1682 1683 if (nvme_qpair_is_admin_queue(qpair)) { 1684 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1685 } else { 1686 active_proc = qpair->active_proc; 1687 } 1688 1689 /* Only check timeouts if the current process has a timeout callback. */ 1690 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1691 return; 1692 } 1693 1694 t02 = spdk_get_ticks(); 1695 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1696 assert(tcp_req->req != NULL); 1697 1698 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1699 /* 1700 * The requests are in order, so as soon as one has not timed out, 1701 * stop iterating. 1702 */ 1703 break; 1704 } 1705 } 1706 } 1707 1708 static int 1709 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1710 { 1711 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1712 uint32_t reaped; 1713 int rc; 1714 1715 if (qpair->poll_group == NULL) { 1716 rc = spdk_sock_flush(tqpair->sock); 1717 if (rc < 0) { 1718 return rc; 1719 } 1720 } 1721 1722 if (max_completions == 0) { 1723 max_completions = tqpair->num_entries; 1724 } else { 1725 max_completions = spdk_min(max_completions, tqpair->num_entries); 1726 } 1727 1728 reaped = 0; 1729 do { 1730 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1731 if (rc < 0) { 1732 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1733 errno, spdk_strerror(errno)); 1734 goto fail; 1735 } else if (rc == 0) { 1736 /* Partial PDU is read */ 1737 break; 1738 } 1739 1740 } while (reaped < max_completions); 1741 1742 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1743 nvme_tcp_qpair_check_timeout(qpair); 1744 } 1745 1746 return reaped; 1747 fail: 1748 1749 /* 1750 * Since admin queues take the ctrlr_lock before entering this function, 1751 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1752 * to call the generic function which will take the lock for us. 1753 */ 1754 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1755 1756 if (nvme_qpair_is_admin_queue(qpair)) { 1757 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1758 } else { 1759 nvme_ctrlr_disconnect_qpair(qpair); 1760 } 1761 return -ENXIO; 1762 } 1763 1764 static void 1765 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1766 { 1767 struct spdk_nvme_qpair *qpair = ctx; 1768 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1769 int32_t num_completions; 1770 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1771 1772 if (tqpair->needs_poll) { 1773 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); 1774 tqpair->needs_poll = false; 1775 } 1776 1777 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1778 1779 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1780 pgroup->num_completions += num_completions; 1781 } else { 1782 pgroup->num_completions = -ENXIO; 1783 } 1784 } 1785 1786 static void 1787 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 1788 { 1789 } 1790 1791 static int 1792 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1793 { 1794 struct spdk_nvme_tcp_ic_req *ic_req; 1795 struct nvme_tcp_pdu *pdu; 1796 uint64_t icreq_timeout_tsc; 1797 int rc; 1798 1799 pdu = tqpair->send_pdu; 1800 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1801 ic_req = &pdu->hdr.ic_req; 1802 1803 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1804 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1805 ic_req->pfv = 0; 1806 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1807 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1808 1809 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1810 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1811 1812 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1813 1814 icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1815 do { 1816 if (tqpair->qpair.poll_group) { 1817 rc = (int)nvme_tcp_poll_group_process_completions(tqpair->qpair.poll_group, 0, 1818 dummy_disconnected_qpair_cb); 1819 } else { 1820 rc = nvme_tcp_qpair_process_completions(&tqpair->qpair, 0); 1821 } 1822 } while ((tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) && 1823 (rc >= 0) && (spdk_get_ticks() <= icreq_timeout_tsc)); 1824 1825 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1826 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1827 return -1; 1828 } 1829 1830 SPDK_DEBUGLOG(nvme, "Succesfully construct the tqpair=%p via correct icresp\n", tqpair); 1831 1832 return 0; 1833 } 1834 1835 static int 1836 nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1837 { 1838 struct sockaddr_storage dst_addr; 1839 struct sockaddr_storage src_addr; 1840 int rc; 1841 struct nvme_tcp_qpair *tqpair; 1842 int family; 1843 long int port; 1844 struct spdk_sock_opts opts; 1845 1846 tqpair = nvme_tcp_qpair(qpair); 1847 1848 switch (ctrlr->trid.adrfam) { 1849 case SPDK_NVMF_ADRFAM_IPV4: 1850 family = AF_INET; 1851 break; 1852 case SPDK_NVMF_ADRFAM_IPV6: 1853 family = AF_INET6; 1854 break; 1855 default: 1856 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1857 rc = -1; 1858 return rc; 1859 } 1860 1861 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1862 1863 memset(&dst_addr, 0, sizeof(dst_addr)); 1864 1865 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1866 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1867 if (rc != 0) { 1868 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1869 return rc; 1870 } 1871 1872 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1873 memset(&src_addr, 0, sizeof(src_addr)); 1874 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1875 if (rc != 0) { 1876 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1877 return rc; 1878 } 1879 } 1880 1881 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1882 if (port <= 0 || port >= INT_MAX) { 1883 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1884 rc = -1; 1885 return rc; 1886 } 1887 1888 opts.opts_size = sizeof(opts); 1889 spdk_sock_get_default_opts(&opts); 1890 opts.priority = ctrlr->trid.priority; 1891 opts.zcopy = !nvme_qpair_is_admin_queue(qpair); 1892 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1893 if (!tqpair->sock) { 1894 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1895 tqpair, ctrlr->trid.traddr, port); 1896 rc = -1; 1897 return rc; 1898 } 1899 1900 return 0; 1901 } 1902 1903 static int 1904 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1905 { 1906 int rc = 0; 1907 struct nvme_tcp_qpair *tqpair; 1908 1909 tqpair = nvme_tcp_qpair(qpair); 1910 1911 if (!tqpair->sock) { 1912 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 1913 if (rc < 0) { 1914 return rc; 1915 } 1916 } 1917 1918 if (qpair->poll_group) { 1919 rc = nvme_poll_group_connect_qpair(qpair); 1920 if (rc) { 1921 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 1922 return rc; 1923 } 1924 } 1925 1926 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 1927 /* Explicitly set the state and recv_state of tqpair */ 1928 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 1929 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1930 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1931 } 1932 rc = nvme_tcp_qpair_icreq_send(tqpair); 1933 if (rc != 0) { 1934 SPDK_ERRLOG("Unable to connect the tqpair\n"); 1935 return rc; 1936 } 1937 1938 rc = nvme_fabric_qpair_connect(&tqpair->qpair, tqpair->num_entries); 1939 if (rc < 0) { 1940 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1941 return rc; 1942 } 1943 1944 return 0; 1945 } 1946 1947 static struct spdk_nvme_qpair * 1948 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 1949 uint16_t qid, uint32_t qsize, 1950 enum spdk_nvme_qprio qprio, 1951 uint32_t num_requests) 1952 { 1953 struct nvme_tcp_qpair *tqpair; 1954 struct spdk_nvme_qpair *qpair; 1955 int rc; 1956 1957 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 1958 if (!tqpair) { 1959 SPDK_ERRLOG("failed to get create tqpair\n"); 1960 return NULL; 1961 } 1962 1963 tqpair->num_entries = qsize; 1964 qpair = &tqpair->qpair; 1965 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests); 1966 if (rc != 0) { 1967 free(tqpair); 1968 return NULL; 1969 } 1970 1971 rc = nvme_tcp_alloc_reqs(tqpair); 1972 if (rc) { 1973 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 1974 return NULL; 1975 } 1976 1977 /* spdk_nvme_qpair_get_optimal_poll_group needs socket information. 1978 * So create the socket first when creating a qpair. */ 1979 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair); 1980 if (rc) { 1981 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 1982 return NULL; 1983 } 1984 1985 return qpair; 1986 } 1987 1988 static struct spdk_nvme_qpair * 1989 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1990 const struct spdk_nvme_io_qpair_opts *opts) 1991 { 1992 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 1993 opts->io_queue_requests); 1994 } 1995 1996 static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 1997 const struct spdk_nvme_ctrlr_opts *opts, 1998 void *devhandle) 1999 { 2000 struct nvme_tcp_ctrlr *tctrlr; 2001 int rc; 2002 2003 tctrlr = calloc(1, sizeof(*tctrlr)); 2004 if (tctrlr == NULL) { 2005 SPDK_ERRLOG("could not allocate ctrlr\n"); 2006 return NULL; 2007 } 2008 2009 tctrlr->ctrlr.opts = *opts; 2010 tctrlr->ctrlr.trid = *trid; 2011 2012 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 2013 if (rc != 0) { 2014 free(tctrlr); 2015 return NULL; 2016 } 2017 2018 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 2019 tctrlr->ctrlr.opts.admin_queue_size, 0, 2020 tctrlr->ctrlr.opts.admin_queue_size); 2021 if (!tctrlr->ctrlr.adminq) { 2022 SPDK_ERRLOG("failed to create admin qpair\n"); 2023 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 2024 return NULL; 2025 } 2026 2027 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 2028 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2029 nvme_ctrlr_destruct(&tctrlr->ctrlr); 2030 return NULL; 2031 } 2032 2033 return &tctrlr->ctrlr; 2034 } 2035 2036 static uint32_t 2037 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2038 { 2039 /* TCP transport doens't limit maximum IO transfer size. */ 2040 return UINT32_MAX; 2041 } 2042 2043 static uint16_t 2044 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2045 { 2046 /* 2047 * We do not support >1 SGE in the initiator currently, 2048 * so we can only return 1 here. Once that support is 2049 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 2050 * instead. 2051 */ 2052 return 1; 2053 } 2054 2055 static int 2056 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2057 int (*iter_fn)(struct nvme_request *req, void *arg), 2058 void *arg) 2059 { 2060 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2061 struct nvme_tcp_req *tcp_req, *tmp; 2062 int rc; 2063 2064 assert(iter_fn != NULL); 2065 2066 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2067 assert(tcp_req->req != NULL); 2068 2069 rc = iter_fn(tcp_req->req, arg); 2070 if (rc != 0) { 2071 return rc; 2072 } 2073 } 2074 2075 return 0; 2076 } 2077 2078 static void 2079 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2080 { 2081 struct nvme_tcp_req *tcp_req, *tmp; 2082 struct spdk_nvme_cpl cpl; 2083 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2084 2085 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2086 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2087 2088 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 2089 assert(tcp_req->req != NULL); 2090 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2091 continue; 2092 } 2093 2094 nvme_tcp_req_complete(tcp_req, &cpl); 2095 nvme_tcp_req_put(tqpair, tcp_req); 2096 } 2097 } 2098 2099 static struct spdk_nvme_transport_poll_group * 2100 nvme_tcp_poll_group_create(void) 2101 { 2102 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 2103 2104 if (group == NULL) { 2105 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2106 return NULL; 2107 } 2108 2109 TAILQ_INIT(&group->needs_poll); 2110 2111 group->sock_group = spdk_sock_group_create(group); 2112 if (group->sock_group == NULL) { 2113 free(group); 2114 SPDK_ERRLOG("Unable to allocate sock group.\n"); 2115 return NULL; 2116 } 2117 2118 return &group->group; 2119 } 2120 2121 static struct spdk_nvme_transport_poll_group * 2122 nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair) 2123 { 2124 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2125 struct spdk_sock_group *group = NULL; 2126 int rc; 2127 2128 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group); 2129 if (!rc && group != NULL) { 2130 return spdk_sock_group_get_ctx(group); 2131 } 2132 2133 return NULL; 2134 } 2135 2136 static int 2137 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2138 { 2139 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2140 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2141 2142 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2143 return -EPROTO; 2144 } 2145 return 0; 2146 } 2147 2148 static int 2149 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2150 { 2151 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 2152 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2153 2154 if (tqpair->needs_poll) { 2155 TAILQ_REMOVE(&group->needs_poll, tqpair, link); 2156 tqpair->needs_poll = false; 2157 } 2158 2159 if (tqpair->sock && group->sock_group) { 2160 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 2161 return -EPROTO; 2162 } 2163 } 2164 return 0; 2165 } 2166 2167 static int 2168 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2169 struct spdk_nvme_qpair *qpair) 2170 { 2171 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 2172 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2173 2174 /* disconnected qpairs won't have a sock to add. */ 2175 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 2176 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2177 return -EPROTO; 2178 } 2179 } 2180 2181 return 0; 2182 } 2183 2184 static int 2185 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2186 struct spdk_nvme_qpair *qpair) 2187 { 2188 if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) { 2189 return nvme_poll_group_disconnect_qpair(qpair); 2190 } 2191 2192 return 0; 2193 } 2194 2195 static int64_t 2196 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2197 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2198 { 2199 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2200 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2201 struct nvme_tcp_qpair *tqpair, *tmp_tqpair; 2202 2203 group->completions_per_qpair = completions_per_qpair; 2204 group->num_completions = 0; 2205 2206 spdk_sock_group_poll(group->sock_group); 2207 2208 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2209 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2210 } 2211 2212 /* If any qpairs were marked as needing to be polled due to an asynchronous write completion 2213 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ 2214 TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { 2215 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); 2216 } 2217 2218 return group->num_completions; 2219 } 2220 2221 static int 2222 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2223 { 2224 int rc; 2225 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2226 2227 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2228 return -EBUSY; 2229 } 2230 2231 rc = spdk_sock_group_close(&group->sock_group); 2232 if (rc != 0) { 2233 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2234 assert(false); 2235 } 2236 2237 free(tgroup); 2238 2239 return 0; 2240 } 2241 2242 const struct spdk_nvme_transport_ops tcp_ops = { 2243 .name = "TCP", 2244 .type = SPDK_NVME_TRANSPORT_TCP, 2245 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2246 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2247 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2248 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2249 2250 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2251 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2252 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2253 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2254 2255 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2256 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2257 2258 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2259 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2260 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2261 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2262 2263 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2264 .qpair_reset = nvme_tcp_qpair_reset, 2265 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2266 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2267 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2268 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2269 2270 .poll_group_create = nvme_tcp_poll_group_create, 2271 .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group, 2272 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2273 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2274 .poll_group_add = nvme_tcp_poll_group_add, 2275 .poll_group_remove = nvme_tcp_poll_group_remove, 2276 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2277 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2278 }; 2279 2280 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2281