1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * NVMe/TCP transport 36 */ 37 38 #include "nvme_internal.h" 39 40 #include "spdk/endian.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/stdinc.h" 44 #include "spdk/crc32.h" 45 #include "spdk/endian.h" 46 #include "spdk/assert.h" 47 #include "spdk/string.h" 48 #include "spdk/thread.h" 49 #include "spdk/trace.h" 50 #include "spdk/util.h" 51 52 #include "spdk_internal/nvme_tcp.h" 53 54 #define NVME_TCP_RW_BUFFER_SIZE 131072 55 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 56 57 #define NVME_TCP_HPDA_DEFAULT 0 58 #define NVME_TCP_MAX_R2T_DEFAULT 1 59 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 60 61 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 62 struct nvme_tcp_ctrlr { 63 struct spdk_nvme_ctrlr ctrlr; 64 }; 65 66 struct nvme_tcp_poll_group { 67 struct spdk_nvme_transport_poll_group group; 68 struct spdk_sock_group *sock_group; 69 uint32_t completions_per_qpair; 70 int64_t num_completions; 71 }; 72 73 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 74 struct nvme_tcp_qpair { 75 struct spdk_nvme_qpair qpair; 76 struct spdk_sock *sock; 77 78 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 79 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 80 81 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 82 struct nvme_tcp_pdu recv_pdu; 83 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 84 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 85 enum nvme_tcp_pdu_recv_state recv_state; 86 87 struct nvme_tcp_req *tcp_reqs; 88 89 uint16_t num_entries; 90 uint16_t async_complete; 91 92 struct { 93 uint16_t host_hdgst_enable: 1; 94 uint16_t host_ddgst_enable: 1; 95 uint16_t icreq_send_ack: 1; 96 uint16_t reserved: 13; 97 } flags; 98 99 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 100 uint32_t maxh2cdata; 101 102 uint32_t maxr2t; 103 104 /* 0 based value, which is used to guide the padding */ 105 uint8_t cpda; 106 107 enum nvme_tcp_qpair_state state; 108 }; 109 110 enum nvme_tcp_req_state { 111 NVME_TCP_REQ_FREE, 112 NVME_TCP_REQ_ACTIVE, 113 NVME_TCP_REQ_ACTIVE_R2T, 114 }; 115 116 struct nvme_tcp_req { 117 struct nvme_request *req; 118 enum nvme_tcp_req_state state; 119 uint16_t cid; 120 uint16_t ttag; 121 uint32_t datao; 122 uint32_t r2tl_remain; 123 uint32_t active_r2ts; 124 /* Used to hold a value received from subsequent R2T while we are still 125 * waiting for H2C complete */ 126 uint16_t ttag_r2t_next; 127 bool in_capsule_data; 128 /* It is used to track whether the req can be safely freed */ 129 union { 130 uint8_t raw; 131 struct { 132 /* The last send operation completed - kernel released send buffer */ 133 uint8_t send_ack : 1; 134 /* Data transfer completed - target send resp or last data bit */ 135 uint8_t data_recv : 1; 136 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 137 * from kernel) to send H2C */ 138 uint8_t h2c_send_waiting_ack : 1; 139 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 140 * Rare case, actual when dealing with target that can send several R2T requests. 141 * SPDK TCP target sends 1 R2T for the whole data buffer */ 142 uint8_t r2t_waiting_h2c_complete : 1; 143 uint8_t reserved : 4; 144 } bits; 145 } ordering; 146 struct nvme_tcp_pdu *send_pdu; 147 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 148 uint32_t iovcnt; 149 /* Used to hold a value received from subsequent R2T while we are still 150 * waiting for H2C ack */ 151 uint32_t r2tl_remain_next; 152 struct nvme_tcp_qpair *tqpair; 153 TAILQ_ENTRY(nvme_tcp_req) link; 154 struct spdk_nvme_cpl rsp; 155 }; 156 157 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 158 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 159 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 160 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 161 162 static inline struct nvme_tcp_qpair * 163 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 164 { 165 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 166 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 167 } 168 169 static inline struct nvme_tcp_poll_group * 170 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 171 { 172 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 173 } 174 175 static inline struct nvme_tcp_ctrlr * 176 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 177 { 178 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 179 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 180 } 181 182 static struct nvme_tcp_req * 183 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 184 { 185 struct nvme_tcp_req *tcp_req; 186 187 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 188 if (!tcp_req) { 189 return NULL; 190 } 191 192 assert(tcp_req->state == NVME_TCP_REQ_FREE); 193 tcp_req->state = NVME_TCP_REQ_ACTIVE; 194 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 195 tcp_req->datao = 0; 196 tcp_req->req = NULL; 197 tcp_req->in_capsule_data = false; 198 tcp_req->r2tl_remain = 0; 199 tcp_req->r2tl_remain_next = 0; 200 tcp_req->active_r2ts = 0; 201 tcp_req->iovcnt = 0; 202 tcp_req->ordering.raw = 0; 203 memset(tcp_req->send_pdu, 0, sizeof(struct nvme_tcp_pdu)); 204 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 205 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 206 207 return tcp_req; 208 } 209 210 static void 211 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 212 { 213 assert(tcp_req->state != NVME_TCP_REQ_FREE); 214 tcp_req->state = NVME_TCP_REQ_FREE; 215 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 216 } 217 218 static int 219 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 220 { 221 struct addrinfo *res; 222 struct addrinfo hints; 223 int ret; 224 225 memset(&hints, 0, sizeof(hints)); 226 hints.ai_family = family; 227 hints.ai_socktype = SOCK_STREAM; 228 hints.ai_protocol = 0; 229 230 ret = getaddrinfo(addr, service, &hints, &res); 231 if (ret) { 232 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 233 return ret; 234 } 235 236 if (res->ai_addrlen > sizeof(*sa)) { 237 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 238 ret = -EINVAL; 239 } else { 240 memcpy(sa, res->ai_addr, res->ai_addrlen); 241 } 242 243 freeaddrinfo(res); 244 return ret; 245 } 246 247 static void 248 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 249 { 250 free(tqpair->tcp_reqs); 251 tqpair->tcp_reqs = NULL; 252 253 spdk_free(tqpair->send_pdus); 254 tqpair->send_pdus = NULL; 255 } 256 257 static int 258 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 259 { 260 uint16_t i; 261 struct nvme_tcp_req *tcp_req; 262 263 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 264 if (tqpair->tcp_reqs == NULL) { 265 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 266 goto fail; 267 } 268 269 /* Add additional one member for the send_pdu owned by the tqpair */ 270 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 1) * sizeof(struct nvme_tcp_pdu), 271 0x1000, NULL, 272 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 273 274 if (tqpair->send_pdus == NULL) { 275 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 276 goto fail; 277 } 278 279 TAILQ_INIT(&tqpair->send_queue); 280 TAILQ_INIT(&tqpair->free_reqs); 281 TAILQ_INIT(&tqpair->outstanding_reqs); 282 for (i = 0; i < tqpair->num_entries; i++) { 283 tcp_req = &tqpair->tcp_reqs[i]; 284 tcp_req->cid = i; 285 tcp_req->tqpair = tqpair; 286 tcp_req->send_pdu = &tqpair->send_pdus[i]; 287 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 288 } 289 290 tqpair->send_pdu = &tqpair->send_pdus[i]; 291 292 return 0; 293 fail: 294 nvme_tcp_free_reqs(tqpair); 295 return -ENOMEM; 296 } 297 298 static void 299 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 300 { 301 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 302 struct nvme_tcp_pdu *pdu; 303 304 spdk_sock_close(&tqpair->sock); 305 306 /* clear the send_queue */ 307 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 308 pdu = TAILQ_FIRST(&tqpair->send_queue); 309 /* Remove the pdu from the send_queue to prevent the wrong sending out 310 * in the next round connection 311 */ 312 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 313 } 314 } 315 316 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 317 318 static int 319 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 320 { 321 struct nvme_tcp_qpair *tqpair; 322 323 if (!qpair) { 324 return -1; 325 } 326 327 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 328 nvme_tcp_qpair_abort_reqs(qpair, 1); 329 nvme_qpair_deinit(qpair); 330 tqpair = nvme_tcp_qpair(qpair); 331 nvme_tcp_free_reqs(tqpair); 332 free(tqpair); 333 334 return 0; 335 } 336 337 static int 338 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 339 { 340 return 0; 341 } 342 343 static int 344 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 345 { 346 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 347 348 if (ctrlr->adminq) { 349 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 350 } 351 352 nvme_ctrlr_destruct_finish(ctrlr); 353 354 free(tctrlr); 355 356 return 0; 357 } 358 359 static void 360 _pdu_write_done(void *cb_arg, int err) 361 { 362 struct nvme_tcp_pdu *pdu = cb_arg; 363 struct nvme_tcp_qpair *tqpair = pdu->qpair; 364 365 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 366 367 if (err != 0) { 368 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 369 return; 370 } 371 372 assert(pdu->cb_fn != NULL); 373 pdu->cb_fn(pdu->cb_arg); 374 } 375 376 static int 377 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 378 struct nvme_tcp_pdu *pdu, 379 nvme_tcp_qpair_xfer_complete_cb cb_fn, 380 void *cb_arg) 381 { 382 int hlen; 383 uint32_t crc32c; 384 uint32_t mapped_length = 0; 385 386 hlen = pdu->hdr.common.hlen; 387 388 /* Header Digest */ 389 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 390 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 391 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 392 } 393 394 /* Data Digest */ 395 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 396 tqpair->flags.host_ddgst_enable) { 397 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 398 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 399 } 400 401 pdu->cb_fn = cb_fn; 402 pdu->cb_arg = cb_arg; 403 404 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 405 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 406 &mapped_length); 407 pdu->qpair = tqpair; 408 pdu->sock_req.cb_fn = _pdu_write_done; 409 pdu->sock_req.cb_arg = pdu; 410 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 411 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 412 413 return 0; 414 } 415 416 /* 417 * Build SGL describing contiguous payload buffer. 418 */ 419 static int 420 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 421 { 422 struct nvme_request *req = tcp_req->req; 423 424 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 425 tcp_req->iov[0].iov_len = req->payload_size; 426 tcp_req->iovcnt = 1; 427 428 SPDK_DEBUGLOG(nvme, "enter\n"); 429 430 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 431 432 return 0; 433 } 434 435 /* 436 * Build SGL describing scattered payload buffer. 437 */ 438 static int 439 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 440 { 441 int rc; 442 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 443 struct nvme_request *req = tcp_req->req; 444 445 SPDK_DEBUGLOG(nvme, "enter\n"); 446 447 assert(req->payload_size != 0); 448 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 449 assert(req->payload.reset_sgl_fn != NULL); 450 assert(req->payload.next_sge_fn != NULL); 451 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 452 453 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 454 remaining_size = req->payload_size; 455 456 do { 457 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 458 &length); 459 if (rc) { 460 return -1; 461 } 462 463 length = spdk_min(length, remaining_size); 464 tcp_req->iov[iovcnt].iov_len = length; 465 remaining_size -= length; 466 iovcnt++; 467 } while (remaining_size > 0 && iovcnt < max_num_sgl); 468 469 470 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 471 if (remaining_size > 0) { 472 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 473 tcp_req, iovcnt, remaining_size); 474 return -1; 475 } 476 477 tcp_req->iovcnt = iovcnt; 478 479 return 0; 480 } 481 482 static int 483 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 484 struct nvme_tcp_req *tcp_req) 485 { 486 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 487 int rc = 0; 488 enum spdk_nvme_data_transfer xfer; 489 uint32_t max_incapsule_data_size; 490 491 tcp_req->req = req; 492 req->cmd.cid = tcp_req->cid; 493 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 494 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 495 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 496 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 497 498 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 499 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 500 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 501 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 502 } else { 503 rc = -1; 504 } 505 506 if (rc) { 507 return rc; 508 } 509 510 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 511 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 512 513 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 514 } else { 515 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 516 } 517 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 518 max_incapsule_data_size = ctrlr->ioccsz_bytes; 519 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 520 max_incapsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 521 } 522 523 if (req->payload_size <= max_incapsule_data_size) { 524 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 525 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 526 req->cmd.dptr.sgl1.address = 0; 527 tcp_req->in_capsule_data = true; 528 } 529 } 530 531 return 0; 532 } 533 534 static inline bool 535 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 536 { 537 struct spdk_nvme_cpl cpl; 538 spdk_nvme_cmd_cb user_cb; 539 void *user_cb_arg; 540 struct spdk_nvme_qpair *qpair; 541 struct nvme_request *req; 542 543 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 544 return false; 545 } 546 547 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 548 assert(tcp_req->tqpair != NULL); 549 assert(tcp_req->req != NULL); 550 551 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 552 553 if (!tcp_req->tqpair->qpair.in_completion_context) { 554 tcp_req->tqpair->async_complete++; 555 } 556 557 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 558 memcpy(&cpl, &tcp_req->rsp, sizeof(cpl)); 559 user_cb = tcp_req->req->cb_fn; 560 user_cb_arg = tcp_req->req->cb_arg; 561 qpair = tcp_req->req->qpair; 562 req = tcp_req->req; 563 564 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 565 nvme_tcp_req_put(tcp_req->tqpair, tcp_req); 566 nvme_free_request(tcp_req->req); 567 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 568 569 return true; 570 } 571 572 static void 573 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 574 { 575 struct nvme_tcp_req *tcp_req = cb_arg; 576 577 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 578 tcp_req->tqpair->qpair.id); 579 tcp_req->ordering.bits.send_ack = 1; 580 /* Handle the r2t case */ 581 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 582 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 583 nvme_tcp_send_h2c_data(tcp_req); 584 } else { 585 nvme_tcp_req_complete_safe(tcp_req); 586 } 587 } 588 589 static int 590 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 591 struct nvme_tcp_req *tcp_req) 592 { 593 struct nvme_tcp_pdu *pdu; 594 struct spdk_nvme_tcp_cmd *capsule_cmd; 595 uint32_t plen = 0, alignment; 596 uint8_t pdo; 597 598 SPDK_DEBUGLOG(nvme, "enter\n"); 599 pdu = tcp_req->send_pdu; 600 601 capsule_cmd = &pdu->hdr.capsule_cmd; 602 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 603 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 604 capsule_cmd->ccsqe = tcp_req->req->cmd; 605 606 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 607 608 if (tqpair->flags.host_hdgst_enable) { 609 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 610 tcp_req); 611 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 612 plen += SPDK_NVME_TCP_DIGEST_LEN; 613 } 614 615 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 616 goto end; 617 } 618 619 pdo = plen; 620 pdu->padding_len = 0; 621 if (tqpair->cpda) { 622 alignment = (tqpair->cpda + 1) << 2; 623 if (alignment > plen) { 624 pdu->padding_len = alignment - plen; 625 pdo = alignment; 626 plen = alignment; 627 } 628 } 629 630 capsule_cmd->common.pdo = pdo; 631 plen += tcp_req->req->payload_size; 632 if (tqpair->flags.host_ddgst_enable) { 633 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 634 plen += SPDK_NVME_TCP_DIGEST_LEN; 635 } 636 637 tcp_req->datao = 0; 638 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 639 0, tcp_req->req->payload_size); 640 end: 641 capsule_cmd->common.plen = plen; 642 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 643 644 } 645 646 static int 647 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 648 struct nvme_request *req) 649 { 650 struct nvme_tcp_qpair *tqpair; 651 struct nvme_tcp_req *tcp_req; 652 653 tqpair = nvme_tcp_qpair(qpair); 654 assert(tqpair != NULL); 655 assert(req != NULL); 656 657 tcp_req = nvme_tcp_req_get(tqpair); 658 if (!tcp_req) { 659 /* Inform the upper layer to try again later. */ 660 return -EAGAIN; 661 } 662 663 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 664 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 665 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 666 nvme_tcp_req_put(tqpair, tcp_req); 667 return -1; 668 } 669 670 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 671 } 672 673 static int 674 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 675 { 676 return 0; 677 } 678 679 static void 680 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 681 struct spdk_nvme_cpl *rsp) 682 { 683 struct nvme_request *req; 684 685 assert(tcp_req->req != NULL); 686 req = tcp_req->req; 687 688 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 689 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 690 nvme_free_request(req); 691 } 692 693 static void 694 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 695 { 696 struct nvme_tcp_req *tcp_req, *tmp; 697 struct spdk_nvme_cpl cpl; 698 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 699 700 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 701 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 702 cpl.status.dnr = dnr; 703 704 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 705 nvme_tcp_req_complete(tcp_req, &cpl); 706 nvme_tcp_req_put(tqpair, tcp_req); 707 } 708 } 709 710 static void 711 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 712 enum nvme_tcp_pdu_recv_state state) 713 { 714 if (tqpair->recv_state == state) { 715 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 716 tqpair, state); 717 return; 718 } 719 720 tqpair->recv_state = state; 721 switch (state) { 722 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 723 case NVME_TCP_PDU_RECV_STATE_ERROR: 724 memset(&tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 725 break; 726 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 727 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 728 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 729 default: 730 break; 731 } 732 } 733 734 static void 735 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 736 { 737 struct nvme_tcp_qpair *tqpair = cb_arg; 738 739 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 740 } 741 742 static void 743 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 744 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 745 { 746 struct nvme_tcp_pdu *rsp_pdu; 747 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 748 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 749 uint8_t copy_len; 750 751 rsp_pdu = tqpair->send_pdu; 752 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 753 h2c_term_req = &rsp_pdu->hdr.term_req; 754 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 755 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 756 757 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 758 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 759 DSET32(&h2c_term_req->fei, error_offset); 760 } 761 762 copy_len = pdu->hdr.common.hlen; 763 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 764 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 765 } 766 767 /* Copy the error info into the buffer */ 768 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 769 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 770 771 /* Contain the header len of the wrong received pdu */ 772 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 773 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 774 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, NULL); 775 776 } 777 778 static void 779 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 780 { 781 struct nvme_tcp_pdu *pdu; 782 uint32_t error_offset = 0; 783 enum spdk_nvme_tcp_term_req_fes fes; 784 uint32_t expected_hlen, hd_len = 0; 785 bool plen_error = false; 786 787 pdu = &tqpair->recv_pdu; 788 789 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 790 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 791 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 792 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 793 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 794 goto err; 795 } 796 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 797 if (pdu->hdr.common.plen != expected_hlen) { 798 plen_error = true; 799 } 800 } else { 801 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 802 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotitated\n"); 803 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 804 goto err; 805 } 806 807 switch (pdu->hdr.common.pdu_type) { 808 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 809 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 810 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 811 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 812 } 813 814 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 815 plen_error = true; 816 } 817 break; 818 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 819 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 820 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 821 plen_error = true; 822 } 823 break; 824 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 825 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 826 if ((pdu->hdr.common.plen <= expected_hlen) || 827 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 828 plen_error = true; 829 } 830 break; 831 case SPDK_NVME_TCP_PDU_TYPE_R2T: 832 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 833 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 834 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 835 } 836 837 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 838 plen_error = true; 839 } 840 break; 841 842 default: 843 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 844 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 845 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 846 goto err; 847 } 848 } 849 850 if (pdu->hdr.common.hlen != expected_hlen) { 851 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 852 expected_hlen, pdu->hdr.common.hlen); 853 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 854 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 855 goto err; 856 857 } else if (plen_error) { 858 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 859 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 860 goto err; 861 } else { 862 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 863 nvme_tcp_pdu_calc_psh_len(&tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 864 return; 865 } 866 err: 867 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 868 } 869 870 static struct nvme_tcp_req * 871 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 872 { 873 assert(tqpair != NULL); 874 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 875 return NULL; 876 } 877 878 return &tqpair->tcp_reqs[cid]; 879 } 880 881 static void 882 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 883 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 884 { 885 struct nvme_tcp_req *tcp_req; 886 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 887 uint8_t flags; 888 889 tcp_req = pdu->req; 890 assert(tcp_req != NULL); 891 892 SPDK_DEBUGLOG(nvme, "enter\n"); 893 c2h_data = &pdu->hdr.c2h_data; 894 tcp_req->datao += pdu->data_len; 895 flags = c2h_data->common.flags; 896 897 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 898 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 899 if (tcp_req->datao == tcp_req->req->payload_size) { 900 tcp_req->rsp.status.p = 0; 901 } else { 902 tcp_req->rsp.status.p = 1; 903 } 904 905 tcp_req->rsp.cid = tcp_req->cid; 906 tcp_req->rsp.sqid = tqpair->qpair.id; 907 tcp_req->ordering.bits.data_recv = 1; 908 909 if (nvme_tcp_req_complete_safe(tcp_req)) { 910 (*reaped)++; 911 } 912 } 913 } 914 915 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 916 "Invalid PDU Header Field", 917 "PDU Sequence Error", 918 "Header Digest Error", 919 "Data Transfer Out of Range", 920 "Data Transfer Limit Exceeded", 921 "Unsupported parameter", 922 }; 923 924 static void 925 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 926 { 927 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 928 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 929 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 930 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 931 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 932 DGET32(c2h_term_req->fei)); 933 } 934 /* we may also need to dump some other info here */ 935 } 936 937 static void 938 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 939 struct nvme_tcp_pdu *pdu) 940 { 941 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 942 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 943 } 944 945 static void 946 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 947 uint32_t *reaped) 948 { 949 int rc = 0; 950 struct nvme_tcp_pdu *pdu; 951 uint32_t crc32c, error_offset = 0; 952 enum spdk_nvme_tcp_term_req_fes fes; 953 954 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 955 pdu = &tqpair->recv_pdu; 956 957 SPDK_DEBUGLOG(nvme, "enter\n"); 958 959 /* check data digest if need */ 960 if (pdu->ddgst_enable) { 961 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 962 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 963 if (rc == 0) { 964 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 965 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 966 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 967 return; 968 } 969 } 970 971 switch (pdu->hdr.common.pdu_type) { 972 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 973 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 974 break; 975 976 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 977 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 978 break; 979 980 default: 981 /* The code should not go to here */ 982 SPDK_ERRLOG("The code should not go to here\n"); 983 break; 984 } 985 } 986 987 static void 988 nvme_tcp_send_icreq_complete(void *cb_arg) 989 { 990 struct nvme_tcp_qpair *tqpair = cb_arg; 991 992 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id); 993 994 tqpair->flags.icreq_send_ack = true; 995 996 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 997 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finilize icresp\n", tqpair, tqpair->qpair.id); 998 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 999 } 1000 } 1001 1002 static void 1003 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1004 struct nvme_tcp_pdu *pdu) 1005 { 1006 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1007 uint32_t error_offset = 0; 1008 enum spdk_nvme_tcp_term_req_fes fes; 1009 int recv_buf_size; 1010 1011 /* Only PFV 0 is defined currently */ 1012 if (ic_resp->pfv != 0) { 1013 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1014 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1015 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1016 goto end; 1017 } 1018 1019 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1020 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1021 ic_resp->maxh2cdata); 1022 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1023 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1024 goto end; 1025 } 1026 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1027 1028 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1029 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1030 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1031 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1032 goto end; 1033 } 1034 tqpair->cpda = ic_resp->cpda; 1035 1036 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1037 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1038 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1039 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1040 1041 /* Now that we know whether digests are enabled, properly size the receive buffer to 1042 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1043 * parameter. */ 1044 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1045 1046 if (tqpair->flags.host_hdgst_enable) { 1047 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1048 } 1049 1050 if (tqpair->flags.host_ddgst_enable) { 1051 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1052 } 1053 1054 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1055 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1056 tqpair, 1057 recv_buf_size); 1058 /* Not fatal. */ 1059 } 1060 1061 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1062 1063 if (!tqpair->flags.icreq_send_ack) { 1064 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1065 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id); 1066 return; 1067 } 1068 1069 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1070 return; 1071 end: 1072 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1073 return; 1074 } 1075 1076 static void 1077 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1078 uint32_t *reaped) 1079 { 1080 struct nvme_tcp_req *tcp_req; 1081 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1082 uint32_t cid, error_offset = 0; 1083 enum spdk_nvme_tcp_term_req_fes fes; 1084 1085 SPDK_DEBUGLOG(nvme, "enter\n"); 1086 cid = capsule_resp->rccqe.cid; 1087 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1088 1089 if (!tcp_req) { 1090 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1091 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1092 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1093 goto end; 1094 } 1095 1096 assert(tcp_req->req != NULL); 1097 1098 tcp_req->rsp = capsule_resp->rccqe; 1099 tcp_req->ordering.bits.data_recv = 1; 1100 1101 /* Recv the pdu again */ 1102 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1103 1104 if (nvme_tcp_req_complete_safe(tcp_req)) { 1105 (*reaped)++; 1106 } 1107 1108 return; 1109 1110 end: 1111 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1112 return; 1113 } 1114 1115 static void 1116 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1117 struct nvme_tcp_pdu *pdu) 1118 { 1119 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1120 uint32_t error_offset = 0; 1121 enum spdk_nvme_tcp_term_req_fes fes; 1122 1123 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1124 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1125 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1126 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1127 goto end; 1128 } 1129 1130 /* set the data buffer */ 1131 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1132 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1133 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1134 return; 1135 end: 1136 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1137 return; 1138 } 1139 1140 static void 1141 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1142 { 1143 struct nvme_tcp_req *tcp_req; 1144 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1145 uint32_t error_offset = 0; 1146 enum spdk_nvme_tcp_term_req_fes fes; 1147 1148 SPDK_DEBUGLOG(nvme, "enter\n"); 1149 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1150 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1151 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1152 if (!tcp_req) { 1153 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1154 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1155 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1156 goto end; 1157 1158 } 1159 1160 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): datao=%u, payload_size=%u\n", 1161 tcp_req, tqpair, tcp_req->datao, tcp_req->req->payload_size); 1162 1163 if (c2h_data->datal > tcp_req->req->payload_size) { 1164 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1165 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1166 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1167 goto end; 1168 } 1169 1170 if (tcp_req->datao != c2h_data->datao) { 1171 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != datao(%u) in tcp_req\n", 1172 tcp_req, c2h_data->datao, tcp_req->datao); 1173 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1174 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1175 goto end; 1176 } 1177 1178 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1179 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1180 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1181 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1182 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1183 goto end; 1184 1185 } 1186 1187 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1188 c2h_data->datao, c2h_data->datal); 1189 pdu->req = tcp_req; 1190 1191 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1192 return; 1193 1194 end: 1195 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1196 return; 1197 } 1198 1199 static void 1200 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1201 { 1202 struct nvme_tcp_req *tcp_req = cb_arg; 1203 1204 assert(tcp_req != NULL); 1205 1206 tcp_req->ordering.bits.send_ack = 1; 1207 if (tcp_req->r2tl_remain) { 1208 nvme_tcp_send_h2c_data(tcp_req); 1209 } else { 1210 assert(tcp_req->active_r2ts > 0); 1211 tcp_req->active_r2ts--; 1212 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1213 1214 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1215 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1216 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1217 assert(tcp_req->active_r2ts > 0); 1218 tcp_req->ttag = tcp_req->ttag_r2t_next; 1219 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1220 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1221 nvme_tcp_send_h2c_data(tcp_req); 1222 return; 1223 } 1224 1225 /* Need also call this function to free the resource */ 1226 nvme_tcp_req_complete_safe(tcp_req); 1227 } 1228 } 1229 1230 static void 1231 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1232 { 1233 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1234 struct nvme_tcp_pdu *rsp_pdu; 1235 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1236 uint32_t plen, pdo, alignment; 1237 1238 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1239 tcp_req->ordering.bits.send_ack = 0; 1240 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1241 rsp_pdu = tcp_req->send_pdu; 1242 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1243 h2c_data = &rsp_pdu->hdr.h2c_data; 1244 1245 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1246 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1247 h2c_data->cccid = tcp_req->cid; 1248 h2c_data->ttag = tcp_req->ttag; 1249 h2c_data->datao = tcp_req->datao; 1250 1251 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1252 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1253 h2c_data->datao, h2c_data->datal); 1254 tcp_req->r2tl_remain -= h2c_data->datal; 1255 1256 if (tqpair->flags.host_hdgst_enable) { 1257 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1258 plen += SPDK_NVME_TCP_DIGEST_LEN; 1259 } 1260 1261 rsp_pdu->padding_len = 0; 1262 pdo = plen; 1263 if (tqpair->cpda) { 1264 alignment = (tqpair->cpda + 1) << 2; 1265 if (alignment > plen) { 1266 rsp_pdu->padding_len = alignment - plen; 1267 pdo = plen = alignment; 1268 } 1269 } 1270 1271 h2c_data->common.pdo = pdo; 1272 plen += h2c_data->datal; 1273 if (tqpair->flags.host_ddgst_enable) { 1274 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1275 plen += SPDK_NVME_TCP_DIGEST_LEN; 1276 } 1277 1278 h2c_data->common.plen = plen; 1279 tcp_req->datao += h2c_data->datal; 1280 if (!tcp_req->r2tl_remain) { 1281 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1282 } 1283 1284 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1285 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1286 1287 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1288 } 1289 1290 static void 1291 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1292 { 1293 struct nvme_tcp_req *tcp_req; 1294 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1295 uint32_t cid, error_offset = 0; 1296 enum spdk_nvme_tcp_term_req_fes fes; 1297 1298 SPDK_DEBUGLOG(nvme, "enter\n"); 1299 cid = r2t->cccid; 1300 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1301 if (!tcp_req) { 1302 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1303 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1304 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1305 goto end; 1306 } 1307 1308 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1309 tqpair); 1310 1311 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1312 assert(tcp_req->active_r2ts == 0); 1313 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1314 } 1315 1316 if (tcp_req->datao != r2t->r2to) { 1317 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1318 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1319 goto end; 1320 1321 } 1322 1323 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1324 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1325 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1326 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1327 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1328 goto end; 1329 } 1330 1331 tcp_req->active_r2ts++; 1332 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1333 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1334 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1335 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1336 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1337 tcp_req->ttag_r2t_next = r2t->ttag; 1338 tcp_req->r2tl_remain_next = r2t->r2tl; 1339 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1340 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1341 return; 1342 } else { 1343 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1344 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1345 tqpair); 1346 goto end; 1347 } 1348 } 1349 1350 tcp_req->ttag = r2t->ttag; 1351 tcp_req->r2tl_remain = r2t->r2tl; 1352 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1353 1354 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1355 nvme_tcp_send_h2c_data(tcp_req); 1356 } else { 1357 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1358 } 1359 1360 return; 1361 1362 end: 1363 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1364 return; 1365 1366 } 1367 1368 static void 1369 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1370 { 1371 struct nvme_tcp_pdu *pdu; 1372 int rc; 1373 uint32_t crc32c, error_offset = 0; 1374 enum spdk_nvme_tcp_term_req_fes fes; 1375 1376 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1377 pdu = &tqpair->recv_pdu; 1378 1379 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1380 /* check header digest if needed */ 1381 if (pdu->has_hdgst) { 1382 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1383 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1384 if (rc == 0) { 1385 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1386 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1387 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1388 return; 1389 1390 } 1391 } 1392 1393 switch (pdu->hdr.common.pdu_type) { 1394 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1395 nvme_tcp_icresp_handle(tqpair, pdu); 1396 break; 1397 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1398 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1399 break; 1400 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1401 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1402 break; 1403 1404 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1405 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1406 break; 1407 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1408 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1409 break; 1410 1411 default: 1412 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 1413 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1414 error_offset = 1; 1415 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1416 break; 1417 } 1418 1419 } 1420 1421 static int 1422 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1423 { 1424 int rc = 0; 1425 struct nvme_tcp_pdu *pdu; 1426 uint32_t data_len; 1427 enum nvme_tcp_pdu_recv_state prev_state; 1428 1429 /* The loop here is to allow for several back-to-back state changes. */ 1430 do { 1431 prev_state = tqpair->recv_state; 1432 switch (tqpair->recv_state) { 1433 /* If in a new state */ 1434 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1435 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1436 break; 1437 /* common header */ 1438 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1439 pdu = &tqpair->recv_pdu; 1440 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1441 rc = nvme_tcp_read_data(tqpair->sock, 1442 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1443 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1444 if (rc < 0) { 1445 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1446 break; 1447 } 1448 pdu->ch_valid_bytes += rc; 1449 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1450 rc = NVME_TCP_PDU_IN_PROGRESS; 1451 goto out; 1452 } 1453 } 1454 1455 /* The command header of this PDU has now been read from the socket. */ 1456 nvme_tcp_pdu_ch_handle(tqpair); 1457 break; 1458 /* Wait for the pdu specific header */ 1459 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1460 pdu = &tqpair->recv_pdu; 1461 rc = nvme_tcp_read_data(tqpair->sock, 1462 pdu->psh_len - pdu->psh_valid_bytes, 1463 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1464 if (rc < 0) { 1465 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1466 break; 1467 } 1468 1469 pdu->psh_valid_bytes += rc; 1470 if (pdu->psh_valid_bytes < pdu->psh_len) { 1471 rc = NVME_TCP_PDU_IN_PROGRESS; 1472 goto out; 1473 } 1474 1475 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1476 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1477 break; 1478 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1479 pdu = &tqpair->recv_pdu; 1480 /* check whether the data is valid, if not we just return */ 1481 if (!pdu->data_len) { 1482 return NVME_TCP_PDU_IN_PROGRESS; 1483 } 1484 1485 data_len = pdu->data_len; 1486 /* data digest */ 1487 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1488 tqpair->flags.host_ddgst_enable)) { 1489 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1490 pdu->ddgst_enable = true; 1491 } 1492 1493 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1494 if (rc < 0) { 1495 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1496 break; 1497 } 1498 1499 pdu->readv_offset += rc; 1500 if (pdu->readv_offset < data_len) { 1501 rc = NVME_TCP_PDU_IN_PROGRESS; 1502 goto out; 1503 } 1504 1505 assert(pdu->readv_offset == data_len); 1506 /* All of this PDU has now been read from the socket. */ 1507 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1508 break; 1509 case NVME_TCP_PDU_RECV_STATE_ERROR: 1510 rc = NVME_TCP_PDU_FATAL; 1511 break; 1512 default: 1513 assert(0); 1514 break; 1515 } 1516 } while (prev_state != tqpair->recv_state); 1517 1518 out: 1519 *reaped += tqpair->async_complete; 1520 tqpair->async_complete = 0; 1521 1522 return rc; 1523 } 1524 1525 static void 1526 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1527 { 1528 uint64_t t02; 1529 struct nvme_tcp_req *tcp_req, *tmp; 1530 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1531 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1532 struct spdk_nvme_ctrlr_process *active_proc; 1533 1534 /* Don't check timeouts during controller initialization. */ 1535 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1536 return; 1537 } 1538 1539 if (nvme_qpair_is_admin_queue(qpair)) { 1540 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1541 } else { 1542 active_proc = qpair->active_proc; 1543 } 1544 1545 /* Only check timeouts if the current process has a timeout callback. */ 1546 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1547 return; 1548 } 1549 1550 t02 = spdk_get_ticks(); 1551 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1552 assert(tcp_req->req != NULL); 1553 1554 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1555 /* 1556 * The requests are in order, so as soon as one has not timed out, 1557 * stop iterating. 1558 */ 1559 break; 1560 } 1561 } 1562 } 1563 1564 static int 1565 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1566 { 1567 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1568 uint32_t reaped; 1569 int rc; 1570 1571 rc = spdk_sock_flush(tqpair->sock); 1572 if (rc < 0) { 1573 return rc; 1574 } 1575 1576 if (max_completions == 0) { 1577 max_completions = tqpair->num_entries; 1578 } else { 1579 max_completions = spdk_min(max_completions, tqpair->num_entries); 1580 } 1581 1582 reaped = 0; 1583 do { 1584 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1585 if (rc < 0) { 1586 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1587 errno, spdk_strerror(errno)); 1588 goto fail; 1589 } else if (rc == 0) { 1590 /* Partial PDU is read */ 1591 break; 1592 } 1593 1594 } while (reaped < max_completions); 1595 1596 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1597 nvme_tcp_qpair_check_timeout(qpair); 1598 } 1599 1600 return reaped; 1601 fail: 1602 1603 /* 1604 * Since admin queues take the ctrlr_lock before entering this function, 1605 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1606 * to call the generic function which will take the lock for us. 1607 */ 1608 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1609 1610 if (nvme_qpair_is_admin_queue(qpair)) { 1611 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1612 } else { 1613 nvme_ctrlr_disconnect_qpair(qpair); 1614 } 1615 return -ENXIO; 1616 } 1617 1618 static void 1619 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1620 { 1621 struct spdk_nvme_qpair *qpair = ctx; 1622 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1623 int32_t num_completions; 1624 1625 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1626 1627 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1628 pgroup->num_completions += num_completions; 1629 } else { 1630 pgroup->num_completions = -ENXIO; 1631 } 1632 } 1633 1634 static void 1635 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 1636 { 1637 } 1638 1639 static int 1640 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1641 { 1642 struct spdk_nvme_tcp_ic_req *ic_req; 1643 struct nvme_tcp_pdu *pdu; 1644 uint64_t icreq_timeout_tsc; 1645 int rc; 1646 1647 pdu = tqpair->send_pdu; 1648 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1649 ic_req = &pdu->hdr.ic_req; 1650 1651 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1652 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1653 ic_req->pfv = 0; 1654 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1655 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1656 1657 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1658 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1659 1660 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1661 1662 icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1663 do { 1664 if (tqpair->qpair.poll_group) { 1665 rc = (int)nvme_tcp_poll_group_process_completions(tqpair->qpair.poll_group, 0, 1666 dummy_disconnected_qpair_cb); 1667 } else { 1668 rc = nvme_tcp_qpair_process_completions(&tqpair->qpair, 0); 1669 } 1670 } while ((tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) && 1671 (rc >= 0) && (spdk_get_ticks() <= icreq_timeout_tsc)); 1672 1673 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1674 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1675 return -1; 1676 } 1677 1678 SPDK_DEBUGLOG(nvme, "Succesfully construct the tqpair=%p via correct icresp\n", tqpair); 1679 1680 return 0; 1681 } 1682 1683 static int 1684 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1685 { 1686 struct sockaddr_storage dst_addr; 1687 struct sockaddr_storage src_addr; 1688 int rc; 1689 struct nvme_tcp_qpair *tqpair; 1690 int family; 1691 long int port; 1692 struct spdk_sock_opts opts; 1693 1694 tqpair = nvme_tcp_qpair(qpair); 1695 1696 switch (ctrlr->trid.adrfam) { 1697 case SPDK_NVMF_ADRFAM_IPV4: 1698 family = AF_INET; 1699 break; 1700 case SPDK_NVMF_ADRFAM_IPV6: 1701 family = AF_INET6; 1702 break; 1703 default: 1704 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1705 rc = -1; 1706 return rc; 1707 } 1708 1709 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1710 1711 memset(&dst_addr, 0, sizeof(dst_addr)); 1712 1713 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1714 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1715 if (rc != 0) { 1716 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1717 return rc; 1718 } 1719 1720 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1721 memset(&src_addr, 0, sizeof(src_addr)); 1722 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1723 if (rc != 0) { 1724 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1725 return rc; 1726 } 1727 } 1728 1729 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1730 if (port <= 0 || port >= INT_MAX) { 1731 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1732 rc = -1; 1733 return rc; 1734 } 1735 1736 opts.opts_size = sizeof(opts); 1737 spdk_sock_get_default_opts(&opts); 1738 opts.priority = ctrlr->trid.priority; 1739 opts.zcopy = !nvme_qpair_is_admin_queue(qpair) && qpair->poll_group != NULL; 1740 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1741 if (!tqpair->sock) { 1742 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1743 tqpair, ctrlr->trid.traddr, port); 1744 rc = -1; 1745 return rc; 1746 } 1747 1748 if (qpair->poll_group) { 1749 rc = nvme_poll_group_connect_qpair(qpair); 1750 if (rc) { 1751 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 1752 return rc; 1753 } 1754 } 1755 1756 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 1757 /* Explicitly set the state and recv_state of tqpair */ 1758 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 1759 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1760 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1761 } 1762 rc = nvme_tcp_qpair_icreq_send(tqpair); 1763 if (rc != 0) { 1764 SPDK_ERRLOG("Unable to connect the tqpair\n"); 1765 return rc; 1766 } 1767 1768 rc = nvme_fabric_qpair_connect(&tqpair->qpair, tqpair->num_entries); 1769 if (rc < 0) { 1770 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1771 return rc; 1772 } 1773 1774 return 0; 1775 } 1776 1777 static struct spdk_nvme_qpair * 1778 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 1779 uint16_t qid, uint32_t qsize, 1780 enum spdk_nvme_qprio qprio, 1781 uint32_t num_requests) 1782 { 1783 struct nvme_tcp_qpair *tqpair; 1784 struct spdk_nvme_qpair *qpair; 1785 int rc; 1786 1787 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 1788 if (!tqpair) { 1789 SPDK_ERRLOG("failed to get create tqpair\n"); 1790 return NULL; 1791 } 1792 1793 tqpair->num_entries = qsize; 1794 qpair = &tqpair->qpair; 1795 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests); 1796 if (rc != 0) { 1797 free(tqpair); 1798 return NULL; 1799 } 1800 1801 rc = nvme_tcp_alloc_reqs(tqpair); 1802 if (rc) { 1803 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 1804 return NULL; 1805 } 1806 1807 return qpair; 1808 } 1809 1810 static struct spdk_nvme_qpair * 1811 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1812 const struct spdk_nvme_io_qpair_opts *opts) 1813 { 1814 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 1815 opts->io_queue_requests); 1816 } 1817 1818 static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 1819 const struct spdk_nvme_ctrlr_opts *opts, 1820 void *devhandle) 1821 { 1822 struct nvme_tcp_ctrlr *tctrlr; 1823 union spdk_nvme_cap_register cap; 1824 union spdk_nvme_vs_register vs; 1825 int rc; 1826 1827 tctrlr = calloc(1, sizeof(*tctrlr)); 1828 if (tctrlr == NULL) { 1829 SPDK_ERRLOG("could not allocate ctrlr\n"); 1830 return NULL; 1831 } 1832 1833 tctrlr->ctrlr.opts = *opts; 1834 tctrlr->ctrlr.trid = *trid; 1835 1836 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 1837 if (rc != 0) { 1838 free(tctrlr); 1839 return NULL; 1840 } 1841 1842 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 1843 tctrlr->ctrlr.opts.admin_queue_size, 0, 1844 tctrlr->ctrlr.opts.admin_queue_size); 1845 if (!tctrlr->ctrlr.adminq) { 1846 SPDK_ERRLOG("failed to create admin qpair\n"); 1847 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1848 return NULL; 1849 } 1850 1851 rc = nvme_transport_ctrlr_connect_qpair(&tctrlr->ctrlr, tctrlr->ctrlr.adminq); 1852 if (rc < 0) { 1853 SPDK_ERRLOG("failed to connect admin qpair\n"); 1854 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1855 return NULL; 1856 } 1857 1858 if (nvme_ctrlr_get_cap(&tctrlr->ctrlr, &cap)) { 1859 SPDK_ERRLOG("get_cap() failed\n"); 1860 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1861 return NULL; 1862 } 1863 1864 if (nvme_ctrlr_get_vs(&tctrlr->ctrlr, &vs)) { 1865 SPDK_ERRLOG("get_vs() failed\n"); 1866 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1867 return NULL; 1868 } 1869 1870 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 1871 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 1872 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1873 return NULL; 1874 } 1875 1876 nvme_ctrlr_init_cap(&tctrlr->ctrlr, &cap, &vs); 1877 1878 return &tctrlr->ctrlr; 1879 } 1880 1881 static uint32_t 1882 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 1883 { 1884 /* TCP transport doens't limit maximum IO transfer size. */ 1885 return UINT32_MAX; 1886 } 1887 1888 static uint16_t 1889 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 1890 { 1891 /* 1892 * We do not support >1 SGE in the initiator currently, 1893 * so we can only return 1 here. Once that support is 1894 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 1895 * instead. 1896 */ 1897 return 1; 1898 } 1899 1900 static int 1901 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 1902 int (*iter_fn)(struct nvme_request *req, void *arg), 1903 void *arg) 1904 { 1905 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1906 struct nvme_tcp_req *tcp_req, *tmp; 1907 int rc; 1908 1909 assert(iter_fn != NULL); 1910 1911 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1912 assert(tcp_req->req != NULL); 1913 1914 rc = iter_fn(tcp_req->req, arg); 1915 if (rc != 0) { 1916 return rc; 1917 } 1918 } 1919 1920 return 0; 1921 } 1922 1923 static void 1924 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 1925 { 1926 struct nvme_tcp_req *tcp_req, *tmp; 1927 struct spdk_nvme_cpl cpl; 1928 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1929 1930 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1931 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1932 1933 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1934 assert(tcp_req->req != NULL); 1935 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 1936 continue; 1937 } 1938 1939 nvme_tcp_req_complete(tcp_req, &cpl); 1940 nvme_tcp_req_put(tqpair, tcp_req); 1941 } 1942 } 1943 1944 static struct spdk_nvme_transport_poll_group * 1945 nvme_tcp_poll_group_create(void) 1946 { 1947 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 1948 1949 if (group == NULL) { 1950 SPDK_ERRLOG("Unable to allocate poll group.\n"); 1951 return NULL; 1952 } 1953 1954 group->sock_group = spdk_sock_group_create(group); 1955 if (group->sock_group == NULL) { 1956 free(group); 1957 SPDK_ERRLOG("Unable to allocate sock group.\n"); 1958 return NULL; 1959 } 1960 1961 return &group->group; 1962 } 1963 1964 static int 1965 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 1966 { 1967 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1968 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1969 1970 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 1971 return -EPROTO; 1972 } 1973 return 0; 1974 } 1975 1976 static int 1977 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 1978 { 1979 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1980 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1981 1982 if (tqpair->sock && group->sock_group) { 1983 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 1984 return -EPROTO; 1985 } 1986 } 1987 return 0; 1988 } 1989 1990 static int 1991 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 1992 struct spdk_nvme_qpair *qpair) 1993 { 1994 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1995 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 1996 1997 /* disconnected qpairs won't have a sock to add. */ 1998 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 1999 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2000 return -EPROTO; 2001 } 2002 } 2003 2004 return 0; 2005 } 2006 2007 static int 2008 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2009 struct spdk_nvme_qpair *qpair) 2010 { 2011 if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) { 2012 return nvme_poll_group_disconnect_qpair(qpair); 2013 } 2014 2015 return 0; 2016 } 2017 2018 static int64_t 2019 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2020 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2021 { 2022 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2023 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2024 2025 group->completions_per_qpair = completions_per_qpair; 2026 group->num_completions = 0; 2027 2028 spdk_sock_group_poll(group->sock_group); 2029 2030 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2031 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2032 } 2033 2034 return group->num_completions; 2035 } 2036 2037 static int 2038 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2039 { 2040 int rc; 2041 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2042 2043 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2044 return -EBUSY; 2045 } 2046 2047 rc = spdk_sock_group_close(&group->sock_group); 2048 if (rc != 0) { 2049 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2050 assert(false); 2051 } 2052 2053 free(tgroup); 2054 2055 return 0; 2056 } 2057 2058 const struct spdk_nvme_transport_ops tcp_ops = { 2059 .name = "TCP", 2060 .type = SPDK_NVME_TRANSPORT_TCP, 2061 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2062 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2063 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2064 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2065 2066 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2067 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2068 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2069 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2070 2071 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2072 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2073 2074 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2075 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2076 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2077 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2078 2079 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2080 .qpair_reset = nvme_tcp_qpair_reset, 2081 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2082 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2083 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2084 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2085 2086 .poll_group_create = nvme_tcp_poll_group_create, 2087 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2088 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2089 .poll_group_add = nvme_tcp_poll_group_add, 2090 .poll_group_remove = nvme_tcp_poll_group_remove, 2091 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2092 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2093 }; 2094 2095 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2096