1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * NVMe/TCP transport 36 */ 37 38 #include "nvme_internal.h" 39 40 #include "spdk/endian.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/stdinc.h" 44 #include "spdk/crc32.h" 45 #include "spdk/endian.h" 46 #include "spdk/assert.h" 47 #include "spdk/string.h" 48 #include "spdk/thread.h" 49 #include "spdk/trace.h" 50 #include "spdk/util.h" 51 52 #include "spdk_internal/nvme_tcp.h" 53 54 #define NVME_TCP_RW_BUFFER_SIZE 131072 55 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 56 57 #define NVME_TCP_HPDA_DEFAULT 0 58 #define NVME_TCP_MAX_R2T_DEFAULT 1 59 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 60 #define NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE 8192 61 62 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 63 struct nvme_tcp_ctrlr { 64 struct spdk_nvme_ctrlr ctrlr; 65 }; 66 67 struct nvme_tcp_poll_group { 68 struct spdk_nvme_transport_poll_group group; 69 struct spdk_sock_group *sock_group; 70 uint32_t completions_per_qpair; 71 int64_t num_completions; 72 }; 73 74 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 75 struct nvme_tcp_qpair { 76 struct spdk_nvme_qpair qpair; 77 struct spdk_sock *sock; 78 79 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 80 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 81 82 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 83 struct nvme_tcp_pdu recv_pdu; 84 struct nvme_tcp_pdu send_pdu; /* only for error pdu and init pdu */ 85 enum nvme_tcp_pdu_recv_state recv_state; 86 87 struct nvme_tcp_req *tcp_reqs; 88 89 uint16_t num_entries; 90 91 bool host_hdgst_enable; 92 bool host_ddgst_enable; 93 94 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 95 uint32_t maxh2cdata; 96 97 uint32_t maxr2t; 98 99 /* 0 based value, which is used to guide the padding */ 100 uint8_t cpda; 101 102 enum nvme_tcp_qpair_state state; 103 }; 104 105 enum nvme_tcp_req_state { 106 NVME_TCP_REQ_FREE, 107 NVME_TCP_REQ_ACTIVE, 108 NVME_TCP_REQ_ACTIVE_R2T, 109 }; 110 111 struct nvme_tcp_req { 112 struct nvme_request *req; 113 enum nvme_tcp_req_state state; 114 uint16_t cid; 115 uint16_t ttag; 116 uint32_t datao; 117 uint32_t r2tl_remain; 118 uint32_t active_r2ts; 119 bool in_capsule_data; 120 struct nvme_tcp_pdu send_pdu; 121 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 122 uint32_t iovcnt; 123 TAILQ_ENTRY(nvme_tcp_req) link; 124 }; 125 126 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 127 128 static inline struct nvme_tcp_qpair * 129 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 130 { 131 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 132 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 133 } 134 135 static inline struct nvme_tcp_poll_group * 136 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 137 { 138 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 139 } 140 141 static inline struct nvme_tcp_ctrlr * 142 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 143 { 144 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 145 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 146 } 147 148 static struct nvme_tcp_req * 149 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 150 { 151 struct nvme_tcp_req *tcp_req; 152 153 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 154 if (!tcp_req) { 155 return NULL; 156 } 157 158 assert(tcp_req->state == NVME_TCP_REQ_FREE); 159 tcp_req->state = NVME_TCP_REQ_ACTIVE; 160 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 161 tcp_req->datao = 0; 162 tcp_req->req = NULL; 163 tcp_req->in_capsule_data = false; 164 tcp_req->r2tl_remain = 0; 165 tcp_req->active_r2ts = 0; 166 tcp_req->iovcnt = 0; 167 memset(&tcp_req->send_pdu, 0, sizeof(tcp_req->send_pdu)); 168 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 169 170 return tcp_req; 171 } 172 173 static void 174 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 175 { 176 assert(tcp_req->state != NVME_TCP_REQ_FREE); 177 tcp_req->state = NVME_TCP_REQ_FREE; 178 TAILQ_REMOVE(&tqpair->outstanding_reqs, tcp_req, link); 179 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 180 } 181 182 static int 183 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 184 { 185 struct addrinfo *res; 186 struct addrinfo hints; 187 int ret; 188 189 memset(&hints, 0, sizeof(hints)); 190 hints.ai_family = family; 191 hints.ai_socktype = SOCK_STREAM; 192 hints.ai_protocol = 0; 193 194 ret = getaddrinfo(addr, service, &hints, &res); 195 if (ret) { 196 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 197 return ret; 198 } 199 200 if (res->ai_addrlen > sizeof(*sa)) { 201 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 202 ret = EINVAL; 203 } else { 204 memcpy(sa, res->ai_addr, res->ai_addrlen); 205 } 206 207 freeaddrinfo(res); 208 return ret; 209 } 210 211 static void 212 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 213 { 214 free(tqpair->tcp_reqs); 215 tqpair->tcp_reqs = NULL; 216 } 217 218 static int 219 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 220 { 221 uint16_t i; 222 struct nvme_tcp_req *tcp_req; 223 224 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 225 if (tqpair->tcp_reqs == NULL) { 226 SPDK_ERRLOG("Failed to allocate tcp_reqs\n"); 227 goto fail; 228 } 229 230 TAILQ_INIT(&tqpair->send_queue); 231 TAILQ_INIT(&tqpair->free_reqs); 232 TAILQ_INIT(&tqpair->outstanding_reqs); 233 for (i = 0; i < tqpair->num_entries; i++) { 234 tcp_req = &tqpair->tcp_reqs[i]; 235 tcp_req->cid = i; 236 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 237 } 238 239 return 0; 240 fail: 241 nvme_tcp_free_reqs(tqpair); 242 return -ENOMEM; 243 } 244 245 static void 246 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 247 { 248 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 249 struct nvme_tcp_pdu *pdu; 250 251 spdk_sock_close(&tqpair->sock); 252 253 /* clear the send_queue */ 254 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 255 pdu = TAILQ_FIRST(&tqpair->send_queue); 256 /* Remove the pdu from the send_queue to prevent the wrong sending out 257 * in the next round connection 258 */ 259 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 260 } 261 } 262 263 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 264 265 static int 266 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 267 { 268 struct nvme_tcp_qpair *tqpair; 269 270 if (!qpair) { 271 return -1; 272 } 273 274 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 275 nvme_tcp_qpair_abort_reqs(qpair, 1); 276 nvme_qpair_deinit(qpair); 277 tqpair = nvme_tcp_qpair(qpair); 278 nvme_tcp_free_reqs(tqpair); 279 free(tqpair); 280 281 return 0; 282 } 283 284 static int 285 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 286 { 287 return 0; 288 } 289 290 static int 291 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 292 { 293 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 294 295 if (ctrlr->adminq) { 296 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 297 } 298 299 nvme_ctrlr_destruct_finish(ctrlr); 300 301 free(tctrlr); 302 303 return 0; 304 } 305 306 static void 307 _pdu_write_done(void *cb_arg, int err) 308 { 309 struct nvme_tcp_pdu *pdu = cb_arg; 310 struct nvme_tcp_qpair *tqpair = pdu->qpair; 311 312 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 313 314 if (err != 0) { 315 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 316 return; 317 } 318 319 assert(pdu->cb_fn != NULL); 320 pdu->cb_fn(pdu->cb_arg); 321 } 322 323 static int 324 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 325 struct nvme_tcp_pdu *pdu, 326 nvme_tcp_qpair_xfer_complete_cb cb_fn, 327 void *cb_arg) 328 { 329 int hlen; 330 uint32_t crc32c; 331 uint32_t mapped_length = 0; 332 333 hlen = pdu->hdr.common.hlen; 334 335 /* Header Digest */ 336 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) { 337 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 338 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 339 } 340 341 /* Data Digest */ 342 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) { 343 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 344 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 345 } 346 347 pdu->cb_fn = cb_fn; 348 pdu->cb_arg = cb_arg; 349 350 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 351 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable, 352 &mapped_length); 353 pdu->qpair = tqpair; 354 pdu->sock_req.cb_fn = _pdu_write_done; 355 pdu->sock_req.cb_arg = pdu; 356 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 357 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 358 359 return 0; 360 } 361 362 /* 363 * Build SGL describing contiguous payload buffer. 364 */ 365 static int 366 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 367 { 368 struct nvme_request *req = tcp_req->req; 369 370 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 371 tcp_req->iov[0].iov_len = req->payload_size; 372 tcp_req->iovcnt = 1; 373 374 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 375 376 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 377 378 return 0; 379 } 380 381 /* 382 * Build SGL describing scattered payload buffer. 383 */ 384 static int 385 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 386 { 387 int rc; 388 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 389 struct nvme_request *req = tcp_req->req; 390 391 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 392 393 assert(req->payload_size != 0); 394 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 395 assert(req->payload.reset_sgl_fn != NULL); 396 assert(req->payload.next_sge_fn != NULL); 397 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 398 399 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 400 remaining_size = req->payload_size; 401 402 do { 403 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 404 &length); 405 if (rc) { 406 return -1; 407 } 408 409 length = spdk_min(length, remaining_size); 410 tcp_req->iov[iovcnt].iov_len = length; 411 remaining_size -= length; 412 iovcnt++; 413 } while (remaining_size > 0 && iovcnt < max_num_sgl); 414 415 416 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 417 if (remaining_size > 0) { 418 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 419 tcp_req, iovcnt, remaining_size); 420 return -1; 421 } 422 423 tcp_req->iovcnt = iovcnt; 424 425 return 0; 426 } 427 428 static int 429 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 430 struct nvme_tcp_req *tcp_req) 431 { 432 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 433 int rc = 0; 434 enum spdk_nvme_data_transfer xfer; 435 uint32_t max_incapsule_data_size; 436 437 tcp_req->req = req; 438 req->cmd.cid = tcp_req->cid; 439 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 440 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 441 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 442 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 443 444 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 445 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 446 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 447 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 448 } else { 449 rc = -1; 450 } 451 452 if (rc) { 453 return rc; 454 } 455 456 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 457 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 458 459 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 460 } else { 461 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 462 } 463 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 464 max_incapsule_data_size = ctrlr->ioccsz_bytes; 465 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 466 max_incapsule_data_size = spdk_min(max_incapsule_data_size, NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); 467 } 468 469 if (req->payload_size <= max_incapsule_data_size) { 470 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 471 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 472 req->cmd.dptr.sgl1.address = 0; 473 tcp_req->in_capsule_data = true; 474 } 475 } 476 477 return 0; 478 } 479 480 static void 481 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 482 { 483 } 484 485 static int 486 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 487 struct nvme_tcp_req *tcp_req) 488 { 489 struct nvme_tcp_pdu *pdu; 490 struct spdk_nvme_tcp_cmd *capsule_cmd; 491 uint32_t plen = 0, alignment; 492 uint8_t pdo; 493 494 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 495 pdu = &tcp_req->send_pdu; 496 497 capsule_cmd = &pdu->hdr.capsule_cmd; 498 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 499 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 500 capsule_cmd->ccsqe = tcp_req->req->cmd; 501 502 SPDK_DEBUGLOG(SPDK_LOG_NVME, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 503 504 if (tqpair->host_hdgst_enable) { 505 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Header digest is enabled for capsule command on tcp_req=%p\n", 506 tcp_req); 507 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 508 plen += SPDK_NVME_TCP_DIGEST_LEN; 509 } 510 511 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 512 goto end; 513 } 514 515 pdo = plen; 516 pdu->padding_len = 0; 517 if (tqpair->cpda) { 518 alignment = (tqpair->cpda + 1) << 2; 519 if (alignment > plen) { 520 pdu->padding_len = alignment - plen; 521 pdo = alignment; 522 plen = alignment; 523 } 524 } 525 526 capsule_cmd->common.pdo = pdo; 527 plen += tcp_req->req->payload_size; 528 if (tqpair->host_ddgst_enable) { 529 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 530 plen += SPDK_NVME_TCP_DIGEST_LEN; 531 } 532 533 tcp_req->datao = 0; 534 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 535 0, tcp_req->req->payload_size); 536 end: 537 capsule_cmd->common.plen = plen; 538 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, NULL); 539 540 } 541 542 static int 543 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 544 struct nvme_request *req) 545 { 546 struct nvme_tcp_qpair *tqpair; 547 struct nvme_tcp_req *tcp_req; 548 549 tqpair = nvme_tcp_qpair(qpair); 550 assert(tqpair != NULL); 551 assert(req != NULL); 552 553 tcp_req = nvme_tcp_req_get(tqpair); 554 if (!tcp_req) { 555 /* Inform the upper layer to try again later. */ 556 return -EAGAIN; 557 } 558 559 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 560 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 561 nvme_tcp_req_put(tqpair, tcp_req); 562 return -1; 563 } 564 565 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 566 } 567 568 static int 569 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 570 { 571 return 0; 572 } 573 574 static void 575 nvme_tcp_req_complete(struct nvme_request *req, 576 struct spdk_nvme_cpl *rsp) 577 { 578 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 579 nvme_free_request(req); 580 } 581 582 static void 583 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 584 { 585 struct nvme_tcp_req *tcp_req, *tmp; 586 struct nvme_request *req; 587 struct spdk_nvme_cpl cpl; 588 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 589 590 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 591 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 592 cpl.status.dnr = dnr; 593 594 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 595 assert(tcp_req->req != NULL); 596 req = tcp_req->req; 597 598 nvme_tcp_req_complete(req, &cpl); 599 nvme_tcp_req_put(tqpair, tcp_req); 600 } 601 } 602 603 static void 604 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 605 enum nvme_tcp_pdu_recv_state state) 606 { 607 if (tqpair->recv_state == state) { 608 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 609 tqpair, state); 610 return; 611 } 612 613 tqpair->recv_state = state; 614 switch (state) { 615 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 616 case NVME_TCP_PDU_RECV_STATE_ERROR: 617 memset(&tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 618 break; 619 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 620 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 621 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 622 default: 623 break; 624 } 625 } 626 627 static void 628 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 629 { 630 struct nvme_tcp_qpair *tqpair = cb_arg; 631 632 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 633 } 634 635 static void 636 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 637 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 638 { 639 struct nvme_tcp_pdu *rsp_pdu; 640 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 641 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 642 uint8_t copy_len; 643 644 rsp_pdu = &tqpair->send_pdu; 645 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 646 h2c_term_req = &rsp_pdu->hdr.term_req; 647 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 648 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 649 650 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 651 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 652 DSET32(&h2c_term_req->fei, error_offset); 653 } 654 655 copy_len = pdu->hdr.common.hlen; 656 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 657 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 658 } 659 660 /* Copy the error info into the buffer */ 661 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 662 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 663 664 /* Contain the header len of the wrong received pdu */ 665 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 666 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 667 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, NULL); 668 669 } 670 671 static void 672 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 673 { 674 struct nvme_tcp_pdu *pdu; 675 uint32_t error_offset = 0; 676 enum spdk_nvme_tcp_term_req_fes fes; 677 uint32_t expected_hlen, hd_len = 0; 678 bool plen_error = false; 679 680 pdu = &tqpair->recv_pdu; 681 682 SPDK_DEBUGLOG(SPDK_LOG_NVME, "pdu type = %d\n", pdu->hdr.common.pdu_type); 683 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 684 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 685 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 686 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 687 goto err; 688 } 689 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 690 if (pdu->hdr.common.plen != expected_hlen) { 691 plen_error = true; 692 } 693 } else { 694 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 695 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotitated\n"); 696 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 697 goto err; 698 } 699 700 switch (pdu->hdr.common.pdu_type) { 701 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 702 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 703 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 704 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 705 } 706 707 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 708 plen_error = true; 709 } 710 break; 711 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 712 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 713 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 714 plen_error = true; 715 } 716 break; 717 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 718 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 719 if ((pdu->hdr.common.plen <= expected_hlen) || 720 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 721 plen_error = true; 722 } 723 break; 724 case SPDK_NVME_TCP_PDU_TYPE_R2T: 725 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 726 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 727 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 728 } 729 730 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 731 plen_error = true; 732 } 733 break; 734 735 default: 736 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 737 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 738 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 739 goto err; 740 } 741 } 742 743 if (pdu->hdr.common.hlen != expected_hlen) { 744 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 745 expected_hlen, pdu->hdr.common.hlen); 746 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 747 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 748 goto err; 749 750 } else if (plen_error) { 751 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 752 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 753 goto err; 754 } else { 755 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 756 nvme_tcp_pdu_calc_psh_len(&tqpair->recv_pdu, tqpair->host_hdgst_enable); 757 return; 758 } 759 err: 760 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 761 } 762 763 static struct nvme_tcp_req * 764 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 765 { 766 assert(tqpair != NULL); 767 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 768 return NULL; 769 } 770 771 return &tqpair->tcp_reqs[cid]; 772 } 773 774 static void 775 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 776 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 777 { 778 struct nvme_tcp_req *tcp_req; 779 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 780 struct spdk_nvme_cpl cpl = {}; 781 uint8_t flags; 782 783 tcp_req = pdu->req; 784 assert(tcp_req != NULL); 785 786 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 787 c2h_data = &pdu->hdr.c2h_data; 788 tcp_req->datao += pdu->data_len; 789 flags = c2h_data->common.flags; 790 791 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 792 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 793 if (tcp_req->datao == tcp_req->req->payload_size) { 794 cpl.status.p = 0; 795 } else { 796 cpl.status.p = 1; 797 } 798 799 cpl.cid = tcp_req->cid; 800 cpl.sqid = tqpair->qpair.id; 801 nvme_tcp_req_complete(tcp_req->req, &cpl); 802 nvme_tcp_req_put(tqpair, tcp_req); 803 (*reaped)++; 804 } 805 } 806 807 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 808 "Invalid PDU Header Field", 809 "PDU Sequence Error", 810 "Header Digest Error", 811 "Data Transfer Out of Range", 812 "Data Transfer Limit Exceeded", 813 "Unsupported parameter", 814 }; 815 816 static void 817 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 818 { 819 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 820 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 821 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 822 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 823 SPDK_DEBUGLOG(SPDK_LOG_NVME, "The offset from the start of the PDU header is %u\n", 824 DGET32(c2h_term_req->fei)); 825 } 826 /* we may also need to dump some other info here */ 827 } 828 829 static void 830 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 831 struct nvme_tcp_pdu *pdu) 832 { 833 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 834 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 835 } 836 837 static void 838 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 839 uint32_t *reaped) 840 { 841 int rc = 0; 842 struct nvme_tcp_pdu *pdu; 843 uint32_t crc32c, error_offset = 0; 844 enum spdk_nvme_tcp_term_req_fes fes; 845 846 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 847 pdu = &tqpair->recv_pdu; 848 849 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 850 851 /* check data digest if need */ 852 if (pdu->ddgst_enable) { 853 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 854 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 855 if (rc == 0) { 856 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 857 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 858 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 859 return; 860 } 861 } 862 863 switch (pdu->hdr.common.pdu_type) { 864 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 865 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 866 break; 867 868 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 869 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 870 break; 871 872 default: 873 /* The code should not go to here */ 874 SPDK_ERRLOG("The code should not go to here\n"); 875 break; 876 } 877 } 878 879 static void 880 nvme_tcp_send_icreq_complete(void *cb_arg) 881 { 882 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Complete the icreq send for tqpair=%p\n", 883 (struct nvme_tcp_qpair *)cb_arg); 884 } 885 886 static void 887 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 888 struct nvme_tcp_pdu *pdu) 889 { 890 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 891 uint32_t error_offset = 0; 892 enum spdk_nvme_tcp_term_req_fes fes; 893 int recv_buf_size; 894 895 /* Only PFV 0 is defined currently */ 896 if (ic_resp->pfv != 0) { 897 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 898 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 899 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 900 goto end; 901 } 902 903 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 904 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 905 ic_resp->maxh2cdata); 906 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 907 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 908 goto end; 909 } 910 tqpair->maxh2cdata = ic_resp->maxh2cdata; 911 912 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 913 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 914 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 915 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 916 goto end; 917 } 918 tqpair->cpda = ic_resp->cpda; 919 920 tqpair->host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 921 tqpair->host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 922 SPDK_DEBUGLOG(SPDK_LOG_NVME, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable); 923 SPDK_DEBUGLOG(SPDK_LOG_NVME, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable); 924 925 /* Now that we know whether digests are enabled, properly size the receive buffer to 926 * handle 4 incoming 4K read commands. */ 927 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_cmd); 928 929 if (tqpair->host_hdgst_enable) { 930 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 931 } 932 933 if (tqpair->host_ddgst_enable) { 934 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 935 } 936 937 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * 4) < 0) { 938 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 939 tqpair, 940 recv_buf_size); 941 /* Not fatal. */ 942 } 943 944 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 945 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 946 return; 947 end: 948 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 949 return; 950 } 951 952 static void 953 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 954 uint32_t *reaped) 955 { 956 struct nvme_tcp_req *tcp_req; 957 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 958 uint32_t cid, error_offset = 0; 959 enum spdk_nvme_tcp_term_req_fes fes; 960 struct spdk_nvme_cpl cpl; 961 962 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 963 cpl = capsule_resp->rccqe; 964 cid = cpl.cid; 965 966 /* Recv the pdu again */ 967 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 968 969 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 970 if (!tcp_req) { 971 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 972 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 973 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 974 goto end; 975 976 } 977 978 assert(tcp_req->req != NULL); 979 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 980 nvme_tcp_req_complete(tcp_req->req, &cpl); 981 nvme_tcp_req_put(tqpair, tcp_req); 982 (*reaped)++; 983 984 SPDK_DEBUGLOG(SPDK_LOG_NVME, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair); 985 986 return; 987 988 end: 989 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 990 return; 991 } 992 993 static void 994 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 995 struct nvme_tcp_pdu *pdu) 996 { 997 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 998 uint32_t error_offset = 0; 999 enum spdk_nvme_tcp_term_req_fes fes; 1000 1001 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1002 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1003 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1004 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1005 goto end; 1006 } 1007 1008 /* set the data buffer */ 1009 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1010 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1011 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1012 return; 1013 end: 1014 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1015 return; 1016 } 1017 1018 static void 1019 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1020 { 1021 struct nvme_tcp_req *tcp_req; 1022 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1023 uint32_t error_offset = 0; 1024 enum spdk_nvme_tcp_term_req_fes fes; 1025 1026 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 1027 SPDK_DEBUGLOG(SPDK_LOG_NVME, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1028 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1029 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1030 if (!tcp_req) { 1031 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1032 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1033 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1034 goto end; 1035 1036 } 1037 1038 SPDK_DEBUGLOG(SPDK_LOG_NVME, "tcp_req(%p) on tqpair(%p): datao=%u, payload_size=%u\n", 1039 tcp_req, tqpair, tcp_req->datao, tcp_req->req->payload_size); 1040 1041 if (c2h_data->datal > tcp_req->req->payload_size) { 1042 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1043 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1044 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1045 goto end; 1046 } 1047 1048 if (tcp_req->datao != c2h_data->datao) { 1049 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != datao(%u) in tcp_req\n", 1050 tcp_req, c2h_data->datao, tcp_req->datao); 1051 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1052 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1053 goto end; 1054 } 1055 1056 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1057 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1058 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1059 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1060 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1061 goto end; 1062 1063 } 1064 1065 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1066 c2h_data->datao, c2h_data->datal); 1067 pdu->req = tcp_req; 1068 1069 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1070 return; 1071 1072 end: 1073 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1074 return; 1075 } 1076 1077 static void 1078 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1079 { 1080 struct nvme_tcp_req *tcp_req = cb_arg; 1081 1082 assert(tcp_req != NULL); 1083 1084 if (tcp_req->r2tl_remain) { 1085 nvme_tcp_send_h2c_data(tcp_req); 1086 } else { 1087 assert(tcp_req->active_r2ts > 0); 1088 tcp_req->active_r2ts--; 1089 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1090 } 1091 } 1092 1093 static void 1094 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1095 { 1096 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1097 struct nvme_tcp_pdu *rsp_pdu; 1098 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1099 uint32_t plen, pdo, alignment; 1100 1101 rsp_pdu = &tcp_req->send_pdu; 1102 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1103 h2c_data = &rsp_pdu->hdr.h2c_data; 1104 1105 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1106 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1107 h2c_data->cccid = tcp_req->cid; 1108 h2c_data->ttag = tcp_req->ttag; 1109 h2c_data->datao = tcp_req->datao; 1110 1111 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1112 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1113 h2c_data->datao, h2c_data->datal); 1114 tcp_req->r2tl_remain -= h2c_data->datal; 1115 1116 if (tqpair->host_hdgst_enable) { 1117 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1118 plen += SPDK_NVME_TCP_DIGEST_LEN; 1119 } 1120 1121 rsp_pdu->padding_len = 0; 1122 pdo = plen; 1123 if (tqpair->cpda) { 1124 alignment = (tqpair->cpda + 1) << 2; 1125 if (alignment > plen) { 1126 rsp_pdu->padding_len = alignment - plen; 1127 pdo = plen = alignment; 1128 } 1129 } 1130 1131 h2c_data->common.pdo = pdo; 1132 plen += h2c_data->datal; 1133 if (tqpair->host_ddgst_enable) { 1134 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1135 plen += SPDK_NVME_TCP_DIGEST_LEN; 1136 } 1137 1138 h2c_data->common.plen = plen; 1139 tcp_req->datao += h2c_data->datal; 1140 if (!tcp_req->r2tl_remain) { 1141 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1142 } 1143 1144 SPDK_DEBUGLOG(SPDK_LOG_NVME, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1145 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1146 1147 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1148 } 1149 1150 static void 1151 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1152 { 1153 struct nvme_tcp_req *tcp_req; 1154 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1155 uint32_t cid, error_offset = 0; 1156 enum spdk_nvme_tcp_term_req_fes fes; 1157 1158 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n"); 1159 cid = r2t->cccid; 1160 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1161 if (!tcp_req) { 1162 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1163 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1164 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1165 goto end; 1166 } 1167 1168 SPDK_DEBUGLOG(SPDK_LOG_NVME, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1169 tqpair); 1170 1171 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1172 assert(tcp_req->active_r2ts == 0); 1173 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1174 } 1175 1176 tcp_req->active_r2ts++; 1177 if (tcp_req->active_r2ts > tqpair->maxr2t) { 1178 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1179 SPDK_ERRLOG("Invalid R2T: it exceeds the R2T maixmal=%u for tqpair=%p\n", tqpair->maxr2t, tqpair); 1180 goto end; 1181 } 1182 1183 if (tcp_req->datao != r2t->r2to) { 1184 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1185 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1186 goto end; 1187 1188 } 1189 1190 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1191 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1192 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1193 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1194 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1195 goto end; 1196 1197 } 1198 1199 tcp_req->ttag = r2t->ttag; 1200 tcp_req->r2tl_remain = r2t->r2tl; 1201 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1202 1203 nvme_tcp_send_h2c_data(tcp_req); 1204 return; 1205 1206 end: 1207 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1208 return; 1209 1210 } 1211 1212 static void 1213 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1214 { 1215 struct nvme_tcp_pdu *pdu; 1216 int rc; 1217 uint32_t crc32c, error_offset = 0; 1218 enum spdk_nvme_tcp_term_req_fes fes; 1219 1220 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1221 pdu = &tqpair->recv_pdu; 1222 1223 SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1224 /* check header digest if needed */ 1225 if (pdu->has_hdgst) { 1226 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1227 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1228 if (rc == 0) { 1229 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1230 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1231 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1232 return; 1233 1234 } 1235 } 1236 1237 switch (pdu->hdr.common.pdu_type) { 1238 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1239 nvme_tcp_icresp_handle(tqpair, pdu); 1240 break; 1241 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1242 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1243 break; 1244 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1245 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1246 break; 1247 1248 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1249 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1250 break; 1251 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1252 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1253 break; 1254 1255 default: 1256 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 1257 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1258 error_offset = 1; 1259 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1260 break; 1261 } 1262 1263 } 1264 1265 static int 1266 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1267 { 1268 int rc = 0; 1269 struct nvme_tcp_pdu *pdu; 1270 uint32_t data_len; 1271 enum nvme_tcp_pdu_recv_state prev_state; 1272 1273 /* The loop here is to allow for several back-to-back state changes. */ 1274 do { 1275 prev_state = tqpair->recv_state; 1276 switch (tqpair->recv_state) { 1277 /* If in a new state */ 1278 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1279 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1280 break; 1281 /* common header */ 1282 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1283 pdu = &tqpair->recv_pdu; 1284 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1285 rc = nvme_tcp_read_data(tqpair->sock, 1286 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1287 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1288 if (rc < 0) { 1289 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1290 break; 1291 } 1292 pdu->ch_valid_bytes += rc; 1293 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1294 return NVME_TCP_PDU_IN_PROGRESS; 1295 } 1296 } 1297 1298 /* The command header of this PDU has now been read from the socket. */ 1299 nvme_tcp_pdu_ch_handle(tqpair); 1300 break; 1301 /* Wait for the pdu specific header */ 1302 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1303 pdu = &tqpair->recv_pdu; 1304 rc = nvme_tcp_read_data(tqpair->sock, 1305 pdu->psh_len - pdu->psh_valid_bytes, 1306 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1307 if (rc < 0) { 1308 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1309 break; 1310 } 1311 1312 pdu->psh_valid_bytes += rc; 1313 if (pdu->psh_valid_bytes < pdu->psh_len) { 1314 return NVME_TCP_PDU_IN_PROGRESS; 1315 } 1316 1317 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1318 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1319 break; 1320 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1321 pdu = &tqpair->recv_pdu; 1322 /* check whether the data is valid, if not we just return */ 1323 if (!pdu->data_len) { 1324 return NVME_TCP_PDU_IN_PROGRESS; 1325 } 1326 1327 data_len = pdu->data_len; 1328 /* data digest */ 1329 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1330 tqpair->host_ddgst_enable)) { 1331 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1332 pdu->ddgst_enable = true; 1333 } 1334 1335 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1336 if (rc < 0) { 1337 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1338 break; 1339 } 1340 1341 pdu->readv_offset += rc; 1342 if (pdu->readv_offset < data_len) { 1343 return NVME_TCP_PDU_IN_PROGRESS; 1344 } 1345 1346 assert(pdu->readv_offset == data_len); 1347 /* All of this PDU has now been read from the socket. */ 1348 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1349 break; 1350 case NVME_TCP_PDU_RECV_STATE_ERROR: 1351 rc = NVME_TCP_PDU_FATAL; 1352 break; 1353 default: 1354 assert(0); 1355 break; 1356 } 1357 } while (prev_state != tqpair->recv_state); 1358 1359 return rc; 1360 } 1361 1362 static void 1363 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1364 { 1365 uint64_t t02; 1366 struct nvme_tcp_req *tcp_req, *tmp; 1367 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1368 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1369 struct spdk_nvme_ctrlr_process *active_proc; 1370 1371 /* Don't check timeouts during controller initialization. */ 1372 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1373 return; 1374 } 1375 1376 if (nvme_qpair_is_admin_queue(qpair)) { 1377 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1378 } else { 1379 active_proc = qpair->active_proc; 1380 } 1381 1382 /* Only check timeouts if the current process has a timeout callback. */ 1383 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1384 return; 1385 } 1386 1387 t02 = spdk_get_ticks(); 1388 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1389 assert(tcp_req->req != NULL); 1390 1391 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1392 /* 1393 * The requests are in order, so as soon as one has not timed out, 1394 * stop iterating. 1395 */ 1396 break; 1397 } 1398 } 1399 } 1400 1401 static int 1402 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1403 { 1404 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1405 uint32_t reaped; 1406 int rc; 1407 1408 rc = spdk_sock_flush(tqpair->sock); 1409 if (rc < 0) { 1410 return rc; 1411 } 1412 1413 if (max_completions == 0) { 1414 max_completions = tqpair->num_entries; 1415 } else { 1416 max_completions = spdk_min(max_completions, tqpair->num_entries); 1417 } 1418 1419 reaped = 0; 1420 do { 1421 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1422 if (rc < 0) { 1423 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Error polling CQ! (%d): %s\n", 1424 errno, spdk_strerror(errno)); 1425 goto fail; 1426 } else if (rc == 0) { 1427 /* Partial PDU is read */ 1428 break; 1429 } 1430 1431 } while (reaped < max_completions); 1432 1433 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1434 nvme_tcp_qpair_check_timeout(qpair); 1435 } 1436 1437 return reaped; 1438 fail: 1439 1440 /* 1441 * Since admin queues take the ctrlr_lock before entering this function, 1442 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1443 * to call the generic function which will take the lock for us. 1444 */ 1445 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1446 1447 if (nvme_qpair_is_admin_queue(qpair)) { 1448 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1449 } else { 1450 nvme_ctrlr_disconnect_qpair(qpair); 1451 } 1452 return -ENXIO; 1453 } 1454 1455 static void 1456 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1457 { 1458 struct spdk_nvme_qpair *qpair = ctx; 1459 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1460 int32_t num_completions; 1461 1462 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1463 1464 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1465 pgroup->num_completions += num_completions; 1466 } else { 1467 pgroup->num_completions = -ENXIO; 1468 } 1469 } 1470 1471 static int 1472 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1473 { 1474 struct spdk_nvme_tcp_ic_req *ic_req; 1475 struct nvme_tcp_pdu *pdu; 1476 uint64_t icreq_timeout_tsc; 1477 int rc; 1478 1479 pdu = &tqpair->send_pdu; 1480 memset(&tqpair->send_pdu, 0, sizeof(tqpair->send_pdu)); 1481 ic_req = &pdu->hdr.ic_req; 1482 1483 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1484 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1485 ic_req->pfv = 0; 1486 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1487 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1488 1489 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1490 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1491 1492 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1493 1494 icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1495 do { 1496 rc = nvme_tcp_qpair_process_completions(&tqpair->qpair, 0); 1497 } while ((tqpair->state == NVME_TCP_QPAIR_STATE_INVALID) && 1498 (rc == 0) && (spdk_get_ticks() <= icreq_timeout_tsc)); 1499 1500 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1501 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1502 return -1; 1503 } 1504 1505 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Succesfully construct the tqpair=%p via correct icresp\n", tqpair); 1506 1507 return 0; 1508 } 1509 1510 static int 1511 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1512 { 1513 struct sockaddr_storage dst_addr; 1514 struct sockaddr_storage src_addr; 1515 int rc; 1516 struct nvme_tcp_qpair *tqpair; 1517 int family; 1518 long int port; 1519 struct spdk_sock_opts opts; 1520 1521 tqpair = nvme_tcp_qpair(qpair); 1522 1523 switch (ctrlr->trid.adrfam) { 1524 case SPDK_NVMF_ADRFAM_IPV4: 1525 family = AF_INET; 1526 break; 1527 case SPDK_NVMF_ADRFAM_IPV6: 1528 family = AF_INET6; 1529 break; 1530 default: 1531 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1532 return -1; 1533 } 1534 1535 SPDK_DEBUGLOG(SPDK_LOG_NVME, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1536 1537 memset(&dst_addr, 0, sizeof(dst_addr)); 1538 1539 SPDK_DEBUGLOG(SPDK_LOG_NVME, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1540 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1541 if (rc != 0) { 1542 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1543 return -1; 1544 } 1545 1546 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1547 memset(&src_addr, 0, sizeof(src_addr)); 1548 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1549 if (rc != 0) { 1550 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1551 return -1; 1552 } 1553 } 1554 1555 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1556 if (port <= 0 || port >= INT_MAX) { 1557 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1558 return -1; 1559 } 1560 1561 opts.opts_size = sizeof(opts); 1562 spdk_sock_get_default_opts(&opts); 1563 opts.priority = ctrlr->trid.priority; 1564 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1565 if (!tqpair->sock) { 1566 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1567 tqpair, ctrlr->trid.traddr, port); 1568 return -1; 1569 } 1570 1571 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 1572 /* Explicitly set the state and recv_state of tqpair */ 1573 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 1574 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1575 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1576 } 1577 rc = nvme_tcp_qpair_icreq_send(tqpair); 1578 if (rc != 0) { 1579 SPDK_ERRLOG("Unable to connect the tqpair\n"); 1580 return -1; 1581 } 1582 1583 rc = nvme_fabric_qpair_connect(&tqpair->qpair, tqpair->num_entries); 1584 if (rc < 0) { 1585 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1586 return -1; 1587 } 1588 1589 return 0; 1590 } 1591 1592 static struct spdk_nvme_qpair * 1593 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 1594 uint16_t qid, uint32_t qsize, 1595 enum spdk_nvme_qprio qprio, 1596 uint32_t num_requests) 1597 { 1598 struct nvme_tcp_qpair *tqpair; 1599 struct spdk_nvme_qpair *qpair; 1600 int rc; 1601 1602 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 1603 if (!tqpair) { 1604 SPDK_ERRLOG("failed to get create tqpair\n"); 1605 return NULL; 1606 } 1607 1608 tqpair->num_entries = qsize; 1609 qpair = &tqpair->qpair; 1610 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests); 1611 if (rc != 0) { 1612 free(tqpair); 1613 return NULL; 1614 } 1615 1616 rc = nvme_tcp_alloc_reqs(tqpair); 1617 if (rc) { 1618 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 1619 return NULL; 1620 } 1621 1622 return qpair; 1623 } 1624 1625 static struct spdk_nvme_qpair * 1626 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1627 const struct spdk_nvme_io_qpair_opts *opts) 1628 { 1629 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 1630 opts->io_queue_requests); 1631 } 1632 1633 static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 1634 const struct spdk_nvme_ctrlr_opts *opts, 1635 void *devhandle) 1636 { 1637 struct nvme_tcp_ctrlr *tctrlr; 1638 union spdk_nvme_cap_register cap; 1639 union spdk_nvme_vs_register vs; 1640 int rc; 1641 1642 tctrlr = calloc(1, sizeof(*tctrlr)); 1643 if (tctrlr == NULL) { 1644 SPDK_ERRLOG("could not allocate ctrlr\n"); 1645 return NULL; 1646 } 1647 1648 tctrlr->ctrlr.opts = *opts; 1649 tctrlr->ctrlr.trid = *trid; 1650 1651 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 1652 if (rc != 0) { 1653 free(tctrlr); 1654 return NULL; 1655 } 1656 1657 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 1658 tctrlr->ctrlr.opts.admin_queue_size, 0, 1659 tctrlr->ctrlr.opts.admin_queue_size); 1660 if (!tctrlr->ctrlr.adminq) { 1661 SPDK_ERRLOG("failed to create admin qpair\n"); 1662 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1663 return NULL; 1664 } 1665 1666 rc = nvme_transport_ctrlr_connect_qpair(&tctrlr->ctrlr, tctrlr->ctrlr.adminq); 1667 if (rc < 0) { 1668 SPDK_ERRLOG("failed to connect admin qpair\n"); 1669 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1670 return NULL; 1671 } 1672 1673 if (nvme_ctrlr_get_cap(&tctrlr->ctrlr, &cap)) { 1674 SPDK_ERRLOG("get_cap() failed\n"); 1675 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1676 return NULL; 1677 } 1678 1679 if (nvme_ctrlr_get_vs(&tctrlr->ctrlr, &vs)) { 1680 SPDK_ERRLOG("get_vs() failed\n"); 1681 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1682 return NULL; 1683 } 1684 1685 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 1686 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 1687 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1688 return NULL; 1689 } 1690 1691 nvme_ctrlr_init_cap(&tctrlr->ctrlr, &cap, &vs); 1692 1693 return &tctrlr->ctrlr; 1694 } 1695 1696 static uint32_t 1697 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 1698 { 1699 /* TCP transport doens't limit maximum IO transfer size. */ 1700 return UINT32_MAX; 1701 } 1702 1703 static uint16_t 1704 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 1705 { 1706 /* 1707 * We do not support >1 SGE in the initiator currently, 1708 * so we can only return 1 here. Once that support is 1709 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 1710 * instead. 1711 */ 1712 return 1; 1713 } 1714 1715 static void 1716 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 1717 { 1718 struct nvme_tcp_req *tcp_req, *tmp; 1719 struct nvme_request *req; 1720 struct spdk_nvme_cpl cpl; 1721 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1722 1723 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1724 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1725 1726 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1727 assert(tcp_req->req != NULL); 1728 req = tcp_req->req; 1729 if (req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 1730 continue; 1731 } 1732 1733 nvme_tcp_req_complete(req, &cpl); 1734 nvme_tcp_req_put(tqpair, tcp_req); 1735 } 1736 } 1737 1738 static struct spdk_nvme_transport_poll_group * 1739 nvme_tcp_poll_group_create(void) 1740 { 1741 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 1742 1743 if (group == NULL) { 1744 SPDK_ERRLOG("Unable to allocate poll group.\n"); 1745 return NULL; 1746 } 1747 1748 group->sock_group = spdk_sock_group_create(group); 1749 if (group->sock_group == NULL) { 1750 free(group); 1751 SPDK_ERRLOG("Unable to allocate sock group.\n"); 1752 return NULL; 1753 } 1754 1755 return &group->group; 1756 } 1757 1758 static int 1759 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 1760 { 1761 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1762 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1763 1764 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 1765 return -EPROTO; 1766 } 1767 return 0; 1768 } 1769 1770 static int 1771 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 1772 { 1773 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1774 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1775 1776 if (tqpair->sock && group->sock_group) { 1777 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 1778 return -EPROTO; 1779 } 1780 } 1781 return 0; 1782 } 1783 1784 static int 1785 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 1786 struct spdk_nvme_qpair *qpair) 1787 { 1788 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1789 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 1790 1791 /* disconnected qpairs won't have a sock to add. */ 1792 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 1793 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 1794 return -EPROTO; 1795 } 1796 } 1797 1798 return 0; 1799 } 1800 1801 static int 1802 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 1803 struct spdk_nvme_qpair *qpair) 1804 { 1805 if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) { 1806 return nvme_poll_group_disconnect_qpair(qpair); 1807 } 1808 1809 return 0; 1810 } 1811 1812 static int64_t 1813 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 1814 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 1815 { 1816 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 1817 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1818 1819 group->completions_per_qpair = completions_per_qpair; 1820 group->num_completions = 0; 1821 1822 spdk_sock_group_poll(group->sock_group); 1823 1824 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 1825 disconnected_qpair_cb(qpair, tgroup->group->ctx); 1826 } 1827 1828 return group->num_completions; 1829 } 1830 1831 static int 1832 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 1833 { 1834 int rc; 1835 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 1836 1837 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 1838 return -EBUSY; 1839 } 1840 1841 rc = spdk_sock_group_close(&group->sock_group); 1842 if (rc != 0) { 1843 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 1844 assert(false); 1845 } 1846 1847 free(tgroup); 1848 1849 return 0; 1850 } 1851 1852 const struct spdk_nvme_transport_ops tcp_ops = { 1853 .name = "TCP", 1854 .type = SPDK_NVME_TRANSPORT_TCP, 1855 .ctrlr_construct = nvme_tcp_ctrlr_construct, 1856 .ctrlr_scan = nvme_fabric_ctrlr_scan, 1857 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 1858 .ctrlr_enable = nvme_tcp_ctrlr_enable, 1859 1860 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 1861 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 1862 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 1863 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 1864 1865 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 1866 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 1867 1868 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 1869 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 1870 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 1871 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 1872 1873 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 1874 .qpair_reset = nvme_tcp_qpair_reset, 1875 .qpair_submit_request = nvme_tcp_qpair_submit_request, 1876 .qpair_process_completions = nvme_tcp_qpair_process_completions, 1877 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 1878 1879 .poll_group_create = nvme_tcp_poll_group_create, 1880 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 1881 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 1882 .poll_group_add = nvme_tcp_poll_group_add, 1883 .poll_group_remove = nvme_tcp_poll_group_remove, 1884 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 1885 .poll_group_destroy = nvme_tcp_poll_group_destroy, 1886 }; 1887 1888 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 1889