1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * NVMe/TCP transport 36 */ 37 38 #include "nvme_internal.h" 39 40 #include "spdk/endian.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/stdinc.h" 44 #include "spdk/crc32.h" 45 #include "spdk/endian.h" 46 #include "spdk/assert.h" 47 #include "spdk/string.h" 48 #include "spdk/thread.h" 49 #include "spdk/trace.h" 50 #include "spdk/util.h" 51 52 #include "spdk_internal/nvme_tcp.h" 53 54 #define NVME_TCP_RW_BUFFER_SIZE 131072 55 #define NVME_TCP_TIME_OUT_IN_SECONDS 2 56 57 #define NVME_TCP_HPDA_DEFAULT 0 58 #define NVME_TCP_MAX_R2T_DEFAULT 1 59 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 60 #define NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE 8192 61 62 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ 63 struct nvme_tcp_ctrlr { 64 struct spdk_nvme_ctrlr ctrlr; 65 }; 66 67 struct nvme_tcp_poll_group { 68 struct spdk_nvme_transport_poll_group group; 69 struct spdk_sock_group *sock_group; 70 uint32_t completions_per_qpair; 71 int64_t num_completions; 72 }; 73 74 /* NVMe TCP qpair extensions for spdk_nvme_qpair */ 75 struct nvme_tcp_qpair { 76 struct spdk_nvme_qpair qpair; 77 struct spdk_sock *sock; 78 79 TAILQ_HEAD(, nvme_tcp_req) free_reqs; 80 TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs; 81 82 TAILQ_HEAD(, nvme_tcp_pdu) send_queue; 83 struct nvme_tcp_pdu recv_pdu; 84 struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */ 85 struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */ 86 enum nvme_tcp_pdu_recv_state recv_state; 87 88 struct nvme_tcp_req *tcp_reqs; 89 90 uint16_t num_entries; 91 uint16_t async_complete; 92 93 struct { 94 uint16_t host_hdgst_enable: 1; 95 uint16_t host_ddgst_enable: 1; 96 uint16_t icreq_send_ack: 1; 97 uint16_t reserved: 13; 98 } flags; 99 100 /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */ 101 uint32_t maxh2cdata; 102 103 uint32_t maxr2t; 104 105 /* 0 based value, which is used to guide the padding */ 106 uint8_t cpda; 107 108 enum nvme_tcp_qpair_state state; 109 }; 110 111 enum nvme_tcp_req_state { 112 NVME_TCP_REQ_FREE, 113 NVME_TCP_REQ_ACTIVE, 114 NVME_TCP_REQ_ACTIVE_R2T, 115 }; 116 117 struct nvme_tcp_req { 118 struct nvme_request *req; 119 enum nvme_tcp_req_state state; 120 uint16_t cid; 121 uint16_t ttag; 122 uint32_t datao; 123 uint32_t r2tl_remain; 124 uint32_t active_r2ts; 125 /* Used to hold a value received from subsequent R2T while we are still 126 * waiting for H2C complete */ 127 uint16_t ttag_r2t_next; 128 bool in_capsule_data; 129 /* It is used to track whether the req can be safely freed */ 130 union { 131 uint8_t raw; 132 struct { 133 /* The last send operation completed - kernel released send buffer */ 134 uint8_t send_ack : 1; 135 /* Data transfer completed - target send resp or last data bit */ 136 uint8_t data_recv : 1; 137 /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification 138 * from kernel) to send H2C */ 139 uint8_t h2c_send_waiting_ack : 1; 140 /* tcp_req received subsequent r2t while it is still waiting for send_ack. 141 * Rare case, actual when dealing with target that can send several R2T requests. 142 * SPDK TCP target sends 1 R2T for the whole data buffer */ 143 uint8_t r2t_waiting_h2c_complete : 1; 144 uint8_t reserved : 4; 145 } bits; 146 } ordering; 147 struct nvme_tcp_pdu *send_pdu; 148 struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; 149 uint32_t iovcnt; 150 /* Used to hold a value received from subsequent R2T while we are still 151 * waiting for H2C ack */ 152 uint32_t r2tl_remain_next; 153 struct nvme_tcp_qpair *tqpair; 154 TAILQ_ENTRY(nvme_tcp_req) link; 155 struct spdk_nvme_cpl rsp; 156 }; 157 158 static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req); 159 static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group 160 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); 161 static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu); 162 163 static inline struct nvme_tcp_qpair * 164 nvme_tcp_qpair(struct spdk_nvme_qpair *qpair) 165 { 166 assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP); 167 return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair); 168 } 169 170 static inline struct nvme_tcp_poll_group * 171 nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group) 172 { 173 return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group); 174 } 175 176 static inline struct nvme_tcp_ctrlr * 177 nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 178 { 179 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP); 180 return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr); 181 } 182 183 static struct nvme_tcp_req * 184 nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair) 185 { 186 struct nvme_tcp_req *tcp_req; 187 188 tcp_req = TAILQ_FIRST(&tqpair->free_reqs); 189 if (!tcp_req) { 190 return NULL; 191 } 192 193 assert(tcp_req->state == NVME_TCP_REQ_FREE); 194 tcp_req->state = NVME_TCP_REQ_ACTIVE; 195 TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link); 196 tcp_req->datao = 0; 197 tcp_req->req = NULL; 198 tcp_req->in_capsule_data = false; 199 tcp_req->r2tl_remain = 0; 200 tcp_req->r2tl_remain_next = 0; 201 tcp_req->active_r2ts = 0; 202 tcp_req->iovcnt = 0; 203 tcp_req->ordering.raw = 0; 204 memset(tcp_req->send_pdu, 0, sizeof(struct nvme_tcp_pdu)); 205 memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl)); 206 TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link); 207 208 return tcp_req; 209 } 210 211 static void 212 nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 213 { 214 assert(tcp_req->state != NVME_TCP_REQ_FREE); 215 tcp_req->state = NVME_TCP_REQ_FREE; 216 TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link); 217 } 218 219 static int 220 nvme_tcp_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 221 { 222 struct addrinfo *res; 223 struct addrinfo hints; 224 int ret; 225 226 memset(&hints, 0, sizeof(hints)); 227 hints.ai_family = family; 228 hints.ai_socktype = SOCK_STREAM; 229 hints.ai_protocol = 0; 230 231 ret = getaddrinfo(addr, service, &hints, &res); 232 if (ret) { 233 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 234 return ret; 235 } 236 237 if (res->ai_addrlen > sizeof(*sa)) { 238 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 239 ret = -EINVAL; 240 } else { 241 memcpy(sa, res->ai_addr, res->ai_addrlen); 242 } 243 244 freeaddrinfo(res); 245 return ret; 246 } 247 248 static void 249 nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair) 250 { 251 free(tqpair->tcp_reqs); 252 tqpair->tcp_reqs = NULL; 253 254 spdk_free(tqpair->send_pdus); 255 tqpair->send_pdus = NULL; 256 } 257 258 static int 259 nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair) 260 { 261 uint16_t i; 262 struct nvme_tcp_req *tcp_req; 263 264 tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req)); 265 if (tqpair->tcp_reqs == NULL) { 266 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair); 267 goto fail; 268 } 269 270 /* Add additional one member for the send_pdu owned by the tqpair */ 271 tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 1) * sizeof(struct nvme_tcp_pdu), 272 0x1000, NULL, 273 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 274 275 if (tqpair->send_pdus == NULL) { 276 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair); 277 goto fail; 278 } 279 280 TAILQ_INIT(&tqpair->send_queue); 281 TAILQ_INIT(&tqpair->free_reqs); 282 TAILQ_INIT(&tqpair->outstanding_reqs); 283 for (i = 0; i < tqpair->num_entries; i++) { 284 tcp_req = &tqpair->tcp_reqs[i]; 285 tcp_req->cid = i; 286 tcp_req->tqpair = tqpair; 287 tcp_req->send_pdu = &tqpair->send_pdus[i]; 288 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link); 289 } 290 291 tqpair->send_pdu = &tqpair->send_pdus[i]; 292 293 return 0; 294 fail: 295 nvme_tcp_free_reqs(tqpair); 296 return -ENOMEM; 297 } 298 299 static void 300 nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 301 { 302 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 303 struct nvme_tcp_pdu *pdu; 304 305 spdk_sock_close(&tqpair->sock); 306 307 /* clear the send_queue */ 308 while (!TAILQ_EMPTY(&tqpair->send_queue)) { 309 pdu = TAILQ_FIRST(&tqpair->send_queue); 310 /* Remove the pdu from the send_queue to prevent the wrong sending out 311 * in the next round connection 312 */ 313 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 314 } 315 } 316 317 static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 318 319 static int 320 nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 321 { 322 struct nvme_tcp_qpair *tqpair; 323 324 if (!qpair) { 325 return -1; 326 } 327 328 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 329 nvme_tcp_qpair_abort_reqs(qpair, 1); 330 nvme_qpair_deinit(qpair); 331 tqpair = nvme_tcp_qpair(qpair); 332 nvme_tcp_free_reqs(tqpair); 333 free(tqpair); 334 335 return 0; 336 } 337 338 static int 339 nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 340 { 341 return 0; 342 } 343 344 static int 345 nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 346 { 347 struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr); 348 349 if (ctrlr->adminq) { 350 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 351 } 352 353 nvme_ctrlr_destruct_finish(ctrlr); 354 355 free(tctrlr); 356 357 return 0; 358 } 359 360 static void 361 _pdu_write_done(void *cb_arg, int err) 362 { 363 struct nvme_tcp_pdu *pdu = cb_arg; 364 struct nvme_tcp_qpair *tqpair = pdu->qpair; 365 366 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); 367 368 if (err != 0) { 369 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair); 370 return; 371 } 372 373 assert(pdu->cb_fn != NULL); 374 pdu->cb_fn(pdu->cb_arg); 375 } 376 377 static int 378 nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair, 379 struct nvme_tcp_pdu *pdu, 380 nvme_tcp_qpair_xfer_complete_cb cb_fn, 381 void *cb_arg) 382 { 383 int hlen; 384 uint32_t crc32c; 385 uint32_t mapped_length = 0; 386 387 hlen = pdu->hdr.common.hlen; 388 389 /* Header Digest */ 390 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) { 391 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 392 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c); 393 } 394 395 /* Data Digest */ 396 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && 397 tqpair->flags.host_ddgst_enable) { 398 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 399 MAKE_DIGEST_WORD(pdu->data_digest, crc32c); 400 } 401 402 pdu->cb_fn = cb_fn; 403 pdu->cb_arg = cb_arg; 404 405 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu, 406 (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable, 407 &mapped_length); 408 pdu->qpair = tqpair; 409 pdu->sock_req.cb_fn = _pdu_write_done; 410 pdu->sock_req.cb_arg = pdu; 411 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq); 412 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req); 413 414 return 0; 415 } 416 417 /* 418 * Build SGL describing contiguous payload buffer. 419 */ 420 static int 421 nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 422 { 423 struct nvme_request *req = tcp_req->req; 424 425 tcp_req->iov[0].iov_base = req->payload.contig_or_cb_arg + req->payload_offset; 426 tcp_req->iov[0].iov_len = req->payload_size; 427 tcp_req->iovcnt = 1; 428 429 SPDK_DEBUGLOG(nvme, "enter\n"); 430 431 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 432 433 return 0; 434 } 435 436 /* 437 * Build SGL describing scattered payload buffer. 438 */ 439 static int 440 nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req) 441 { 442 int rc; 443 uint32_t length, remaining_size, iovcnt = 0, max_num_sgl; 444 struct nvme_request *req = tcp_req->req; 445 446 SPDK_DEBUGLOG(nvme, "enter\n"); 447 448 assert(req->payload_size != 0); 449 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 450 assert(req->payload.reset_sgl_fn != NULL); 451 assert(req->payload.next_sge_fn != NULL); 452 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 453 454 max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS); 455 remaining_size = req->payload_size; 456 457 do { 458 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base, 459 &length); 460 if (rc) { 461 return -1; 462 } 463 464 length = spdk_min(length, remaining_size); 465 tcp_req->iov[iovcnt].iov_len = length; 466 remaining_size -= length; 467 iovcnt++; 468 } while (remaining_size > 0 && iovcnt < max_num_sgl); 469 470 471 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 472 if (remaining_size > 0) { 473 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n", 474 tcp_req, iovcnt, remaining_size); 475 return -1; 476 } 477 478 tcp_req->iovcnt = iovcnt; 479 480 return 0; 481 } 482 483 static int 484 nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, 485 struct nvme_tcp_req *tcp_req) 486 { 487 struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr; 488 int rc = 0; 489 enum spdk_nvme_data_transfer xfer; 490 uint32_t max_incapsule_data_size; 491 492 tcp_req->req = req; 493 req->cmd.cid = tcp_req->cid; 494 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 495 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK; 496 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT; 497 req->cmd.dptr.sgl1.unkeyed.length = req->payload_size; 498 499 if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { 500 rc = nvme_tcp_build_contig_request(tqpair, tcp_req); 501 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { 502 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req); 503 } else { 504 rc = -1; 505 } 506 507 if (rc) { 508 return rc; 509 } 510 511 if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { 512 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; 513 514 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); 515 } else { 516 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 517 } 518 if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 519 max_incapsule_data_size = ctrlr->ioccsz_bytes; 520 if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { 521 max_incapsule_data_size = NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; 522 } 523 524 if (req->payload_size <= max_incapsule_data_size) { 525 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 526 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 527 req->cmd.dptr.sgl1.address = 0; 528 tcp_req->in_capsule_data = true; 529 } 530 } 531 532 return 0; 533 } 534 535 static inline bool 536 nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req) 537 { 538 struct spdk_nvme_cpl cpl; 539 spdk_nvme_cmd_cb user_cb; 540 void *user_cb_arg; 541 struct spdk_nvme_qpair *qpair; 542 struct nvme_request *req; 543 544 if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) { 545 return false; 546 } 547 548 assert(tcp_req->state == NVME_TCP_REQ_ACTIVE); 549 assert(tcp_req->tqpair != NULL); 550 assert(tcp_req->req != NULL); 551 552 SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair); 553 554 if (!tcp_req->tqpair->qpair.in_completion_context) { 555 tcp_req->tqpair->async_complete++; 556 } 557 558 /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */ 559 memcpy(&cpl, &tcp_req->rsp, sizeof(cpl)); 560 user_cb = tcp_req->req->cb_fn; 561 user_cb_arg = tcp_req->req->cb_arg; 562 qpair = tcp_req->req->qpair; 563 req = tcp_req->req; 564 565 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 566 nvme_tcp_req_put(tcp_req->tqpair, tcp_req); 567 nvme_free_request(tcp_req->req); 568 nvme_complete_request(user_cb, user_cb_arg, qpair, req, &cpl); 569 570 return true; 571 } 572 573 static void 574 nvme_tcp_qpair_cmd_send_complete(void *cb_arg) 575 { 576 struct nvme_tcp_req *tcp_req = cb_arg; 577 578 SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid, 579 tcp_req->tqpair->qpair.id); 580 tcp_req->ordering.bits.send_ack = 1; 581 /* Handle the r2t case */ 582 if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) { 583 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req); 584 nvme_tcp_send_h2c_data(tcp_req); 585 } else { 586 nvme_tcp_req_complete_safe(tcp_req); 587 } 588 } 589 590 static int 591 nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair, 592 struct nvme_tcp_req *tcp_req) 593 { 594 struct nvme_tcp_pdu *pdu; 595 struct spdk_nvme_tcp_cmd *capsule_cmd; 596 uint32_t plen = 0, alignment; 597 uint8_t pdo; 598 599 SPDK_DEBUGLOG(nvme, "enter\n"); 600 pdu = tcp_req->send_pdu; 601 602 capsule_cmd = &pdu->hdr.capsule_cmd; 603 capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD; 604 plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd); 605 capsule_cmd->ccsqe = tcp_req->req->cmd; 606 607 SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair); 608 609 if (tqpair->flags.host_hdgst_enable) { 610 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n", 611 tcp_req); 612 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 613 plen += SPDK_NVME_TCP_DIGEST_LEN; 614 } 615 616 if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) { 617 goto end; 618 } 619 620 pdo = plen; 621 pdu->padding_len = 0; 622 if (tqpair->cpda) { 623 alignment = (tqpair->cpda + 1) << 2; 624 if (alignment > plen) { 625 pdu->padding_len = alignment - plen; 626 pdo = alignment; 627 plen = alignment; 628 } 629 } 630 631 capsule_cmd->common.pdo = pdo; 632 plen += tcp_req->req->payload_size; 633 if (tqpair->flags.host_ddgst_enable) { 634 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 635 plen += SPDK_NVME_TCP_DIGEST_LEN; 636 } 637 638 tcp_req->datao = 0; 639 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 640 0, tcp_req->req->payload_size); 641 end: 642 capsule_cmd->common.plen = plen; 643 return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req); 644 645 } 646 647 static int 648 nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair, 649 struct nvme_request *req) 650 { 651 struct nvme_tcp_qpair *tqpair; 652 struct nvme_tcp_req *tcp_req; 653 654 tqpair = nvme_tcp_qpair(qpair); 655 assert(tqpair != NULL); 656 assert(req != NULL); 657 658 tcp_req = nvme_tcp_req_get(tqpair); 659 if (!tcp_req) { 660 /* Inform the upper layer to try again later. */ 661 return -EAGAIN; 662 } 663 664 if (nvme_tcp_req_init(tqpair, req, tcp_req)) { 665 SPDK_ERRLOG("nvme_tcp_req_init() failed\n"); 666 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 667 nvme_tcp_req_put(tqpair, tcp_req); 668 return -1; 669 } 670 671 return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req); 672 } 673 674 static int 675 nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair) 676 { 677 return 0; 678 } 679 680 static void 681 nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, 682 struct spdk_nvme_cpl *rsp) 683 { 684 struct nvme_request *req; 685 686 assert(tcp_req->req != NULL); 687 req = tcp_req->req; 688 689 TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link); 690 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 691 nvme_free_request(req); 692 } 693 694 static void 695 nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 696 { 697 struct nvme_tcp_req *tcp_req, *tmp; 698 struct spdk_nvme_cpl cpl; 699 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 700 701 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 702 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 703 cpl.status.dnr = dnr; 704 705 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 706 nvme_tcp_req_complete(tcp_req, &cpl); 707 nvme_tcp_req_put(tqpair, tcp_req); 708 } 709 } 710 711 static void 712 nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair, 713 enum nvme_tcp_pdu_recv_state state) 714 { 715 if (tqpair->recv_state == state) { 716 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n", 717 tqpair, state); 718 return; 719 } 720 721 tqpair->recv_state = state; 722 switch (state) { 723 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 724 case NVME_TCP_PDU_RECV_STATE_ERROR: 725 memset(&tqpair->recv_pdu, 0, sizeof(struct nvme_tcp_pdu)); 726 break; 727 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 728 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 729 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 730 default: 731 break; 732 } 733 } 734 735 static void 736 nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg) 737 { 738 struct nvme_tcp_qpair *tqpair = cb_arg; 739 740 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING; 741 } 742 743 static void 744 nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 745 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset) 746 { 747 struct nvme_tcp_pdu *rsp_pdu; 748 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req; 749 uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req); 750 uint8_t copy_len; 751 752 rsp_pdu = tqpair->send_pdu; 753 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 754 h2c_term_req = &rsp_pdu->hdr.term_req; 755 h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 756 h2c_term_req->common.hlen = h2c_term_req_hdr_len; 757 758 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 759 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 760 DSET32(&h2c_term_req->fei, error_offset); 761 } 762 763 copy_len = pdu->hdr.common.hlen; 764 if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) { 765 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 766 } 767 768 /* Copy the error info into the buffer */ 769 memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len); 770 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len); 771 772 /* Contain the header len of the wrong received pdu */ 773 h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len; 774 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 775 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, NULL); 776 777 } 778 779 static void 780 nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair) 781 { 782 struct nvme_tcp_pdu *pdu; 783 uint32_t error_offset = 0; 784 enum spdk_nvme_tcp_term_req_fes fes; 785 uint32_t expected_hlen, hd_len = 0; 786 bool plen_error = false; 787 788 pdu = &tqpair->recv_pdu; 789 790 SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type); 791 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) { 792 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) { 793 SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu); 794 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 795 goto err; 796 } 797 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp); 798 if (pdu->hdr.common.plen != expected_hlen) { 799 plen_error = true; 800 } 801 } else { 802 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 803 SPDK_ERRLOG("The TCP/IP tqpair connection is not negotitated\n"); 804 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR; 805 goto err; 806 } 807 808 switch (pdu->hdr.common.pdu_type) { 809 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 810 expected_hlen = sizeof(struct spdk_nvme_tcp_rsp); 811 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 812 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 813 } 814 815 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 816 plen_error = true; 817 } 818 break; 819 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 820 expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 821 if (pdu->hdr.common.plen < pdu->hdr.common.pdo) { 822 plen_error = true; 823 } 824 break; 825 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 826 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr); 827 if ((pdu->hdr.common.plen <= expected_hlen) || 828 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) { 829 plen_error = true; 830 } 831 break; 832 case SPDK_NVME_TCP_PDU_TYPE_R2T: 833 expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr); 834 if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) { 835 hd_len = SPDK_NVME_TCP_DIGEST_LEN; 836 } 837 838 if (pdu->hdr.common.plen != (expected_hlen + hd_len)) { 839 plen_error = true; 840 } 841 break; 842 843 default: 844 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 845 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 846 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type); 847 goto err; 848 } 849 } 850 851 if (pdu->hdr.common.hlen != expected_hlen) { 852 SPDK_ERRLOG("Expected PDU header length %u, got %u\n", 853 expected_hlen, pdu->hdr.common.hlen); 854 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 855 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen); 856 goto err; 857 858 } else if (plen_error) { 859 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 860 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen); 861 goto err; 862 } else { 863 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 864 nvme_tcp_pdu_calc_psh_len(&tqpair->recv_pdu, tqpair->flags.host_hdgst_enable); 865 return; 866 } 867 err: 868 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 869 } 870 871 static struct nvme_tcp_req * 872 get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid) 873 { 874 assert(tqpair != NULL); 875 if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) { 876 return NULL; 877 } 878 879 return &tqpair->tcp_reqs[cid]; 880 } 881 882 static void 883 nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair, 884 struct nvme_tcp_pdu *pdu, uint32_t *reaped) 885 { 886 struct nvme_tcp_req *tcp_req; 887 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data; 888 uint8_t flags; 889 890 tcp_req = pdu->req; 891 assert(tcp_req != NULL); 892 893 SPDK_DEBUGLOG(nvme, "enter\n"); 894 c2h_data = &pdu->hdr.c2h_data; 895 tcp_req->datao += pdu->data_len; 896 flags = c2h_data->common.flags; 897 898 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 899 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) { 900 if (tcp_req->datao == tcp_req->req->payload_size) { 901 tcp_req->rsp.status.p = 0; 902 } else { 903 tcp_req->rsp.status.p = 1; 904 } 905 906 tcp_req->rsp.cid = tcp_req->cid; 907 tcp_req->rsp.sqid = tqpair->qpair.id; 908 tcp_req->ordering.bits.data_recv = 1; 909 910 if (nvme_tcp_req_complete_safe(tcp_req)) { 911 (*reaped)++; 912 } 913 } 914 } 915 916 static const char *spdk_nvme_tcp_term_req_fes_str[] = { 917 "Invalid PDU Header Field", 918 "PDU Sequence Error", 919 "Header Digest Error", 920 "Data Transfer Out of Range", 921 "Data Transfer Limit Exceeded", 922 "Unsupported parameter", 923 }; 924 925 static void 926 nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req) 927 { 928 SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req, 929 spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]); 930 if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) || 931 (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) { 932 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n", 933 DGET32(c2h_term_req->fei)); 934 } 935 /* we may also need to dump some other info here */ 936 } 937 938 static void 939 nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair, 940 struct nvme_tcp_pdu *pdu) 941 { 942 nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req); 943 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 944 } 945 946 static void 947 nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, 948 uint32_t *reaped) 949 { 950 int rc = 0; 951 struct nvme_tcp_pdu *pdu; 952 uint32_t crc32c, error_offset = 0; 953 enum spdk_nvme_tcp_term_req_fes fes; 954 955 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 956 pdu = &tqpair->recv_pdu; 957 958 SPDK_DEBUGLOG(nvme, "enter\n"); 959 960 /* check data digest if need */ 961 if (pdu->ddgst_enable) { 962 crc32c = nvme_tcp_pdu_calc_data_digest(pdu); 963 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c); 964 if (rc == 0) { 965 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 966 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 967 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 968 return; 969 } 970 } 971 972 switch (pdu->hdr.common.pdu_type) { 973 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 974 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped); 975 break; 976 977 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 978 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu); 979 break; 980 981 default: 982 /* The code should not go to here */ 983 SPDK_ERRLOG("The code should not go to here\n"); 984 break; 985 } 986 } 987 988 static void 989 nvme_tcp_send_icreq_complete(void *cb_arg) 990 { 991 struct nvme_tcp_qpair *tqpair = cb_arg; 992 993 SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p\n", tqpair); 994 995 tqpair->flags.icreq_send_ack = true; 996 997 if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) { 998 SPDK_DEBUGLOG(nvme, "qpair %u, finilize icresp\n", tqpair->qpair.id); 999 nvme_tcp_icresp_handle(tqpair, &tqpair->recv_pdu); 1000 } 1001 } 1002 1003 static void 1004 nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, 1005 struct nvme_tcp_pdu *pdu) 1006 { 1007 struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp; 1008 uint32_t error_offset = 0; 1009 enum spdk_nvme_tcp_term_req_fes fes; 1010 int recv_buf_size; 1011 1012 if (!tqpair->flags.icreq_send_ack) { 1013 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING; 1014 SPDK_DEBUGLOG(nvme, "qpair %u, waiting icreq ack\n", tqpair->qpair.id); 1015 return; 1016 } 1017 1018 /* Only PFV 0 is defined currently */ 1019 if (ic_resp->pfv != 0) { 1020 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv); 1021 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1022 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv); 1023 goto end; 1024 } 1025 1026 if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) { 1027 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE, 1028 ic_resp->maxh2cdata); 1029 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1030 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata); 1031 goto end; 1032 } 1033 tqpair->maxh2cdata = ic_resp->maxh2cdata; 1034 1035 if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) { 1036 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda); 1037 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1038 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda); 1039 goto end; 1040 } 1041 tqpair->cpda = ic_resp->cpda; 1042 1043 tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false; 1044 tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false; 1045 SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable); 1046 SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable); 1047 1048 /* Now that we know whether digests are enabled, properly size the receive buffer to 1049 * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 1050 * parameter. */ 1051 recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr); 1052 1053 if (tqpair->flags.host_hdgst_enable) { 1054 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1055 } 1056 1057 if (tqpair->flags.host_ddgst_enable) { 1058 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN; 1059 } 1060 1061 if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) { 1062 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n", 1063 tqpair, 1064 recv_buf_size); 1065 /* Not fatal. */ 1066 } 1067 1068 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING; 1069 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1070 return; 1071 end: 1072 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1073 return; 1074 } 1075 1076 static void 1077 nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu, 1078 uint32_t *reaped) 1079 { 1080 struct nvme_tcp_req *tcp_req; 1081 struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp; 1082 uint32_t cid, error_offset = 0; 1083 enum spdk_nvme_tcp_term_req_fes fes; 1084 1085 SPDK_DEBUGLOG(nvme, "enter\n"); 1086 cid = capsule_resp->rccqe.cid; 1087 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1088 1089 if (!tcp_req) { 1090 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair); 1091 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1092 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe); 1093 goto end; 1094 } 1095 1096 assert(tcp_req->req != NULL); 1097 1098 tcp_req->rsp = capsule_resp->rccqe; 1099 tcp_req->ordering.bits.data_recv = 1; 1100 1101 /* Recv the pdu again */ 1102 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1103 1104 if (nvme_tcp_req_complete_safe(tcp_req)) { 1105 (*reaped)++; 1106 } 1107 1108 return; 1109 1110 end: 1111 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1112 return; 1113 } 1114 1115 static void 1116 nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair, 1117 struct nvme_tcp_pdu *pdu) 1118 { 1119 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req; 1120 uint32_t error_offset = 0; 1121 enum spdk_nvme_tcp_term_req_fes fes; 1122 1123 if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) { 1124 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for c2h_term_req pdu=%p\n", pdu); 1125 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1126 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes); 1127 goto end; 1128 } 1129 1130 /* set the data buffer */ 1131 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen, 1132 c2h_term_req->common.plen - c2h_term_req->common.hlen); 1133 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1134 return; 1135 end: 1136 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1137 return; 1138 } 1139 1140 static void 1141 nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1142 { 1143 struct nvme_tcp_req *tcp_req; 1144 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data; 1145 uint32_t error_offset = 0; 1146 enum spdk_nvme_tcp_term_req_fes fes; 1147 1148 SPDK_DEBUGLOG(nvme, "enter\n"); 1149 SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n", 1150 tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid); 1151 tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid); 1152 if (!tcp_req) { 1153 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid); 1154 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1155 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid); 1156 goto end; 1157 1158 } 1159 1160 SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): datao=%u, payload_size=%u\n", 1161 tcp_req, tqpair, tcp_req->datao, tcp_req->req->payload_size); 1162 1163 if (c2h_data->datal > tcp_req->req->payload_size) { 1164 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n", 1165 tcp_req, c2h_data->datal, tcp_req->req->payload_size); 1166 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1167 goto end; 1168 } 1169 1170 if (tcp_req->datao != c2h_data->datao) { 1171 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != datao(%u) in tcp_req\n", 1172 tcp_req, c2h_data->datao, tcp_req->datao); 1173 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1174 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao); 1175 goto end; 1176 } 1177 1178 if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) { 1179 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n", 1180 tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size); 1181 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1182 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal); 1183 goto end; 1184 1185 } 1186 1187 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt, 1188 c2h_data->datao, c2h_data->datal); 1189 pdu->req = tcp_req; 1190 1191 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD); 1192 return; 1193 1194 end: 1195 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1196 return; 1197 } 1198 1199 static void 1200 nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg) 1201 { 1202 struct nvme_tcp_req *tcp_req = cb_arg; 1203 1204 assert(tcp_req != NULL); 1205 1206 tcp_req->ordering.bits.send_ack = 1; 1207 if (tcp_req->r2tl_remain) { 1208 nvme_tcp_send_h2c_data(tcp_req); 1209 } else { 1210 assert(tcp_req->active_r2ts > 0); 1211 tcp_req->active_r2ts--; 1212 tcp_req->state = NVME_TCP_REQ_ACTIVE; 1213 1214 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) { 1215 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0; 1216 SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req); 1217 assert(tcp_req->active_r2ts > 0); 1218 tcp_req->ttag = tcp_req->ttag_r2t_next; 1219 tcp_req->r2tl_remain = tcp_req->r2tl_remain_next; 1220 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1221 nvme_tcp_send_h2c_data(tcp_req); 1222 return; 1223 } 1224 1225 /* Need also call this function to free the resource */ 1226 nvme_tcp_req_complete_safe(tcp_req); 1227 } 1228 } 1229 1230 static void 1231 nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req) 1232 { 1233 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair); 1234 struct nvme_tcp_pdu *rsp_pdu; 1235 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data; 1236 uint32_t plen, pdo, alignment; 1237 1238 /* Reinit the send_ack and h2c_send_waiting_ack bits */ 1239 tcp_req->ordering.bits.send_ack = 0; 1240 tcp_req->ordering.bits.h2c_send_waiting_ack = 0; 1241 rsp_pdu = tcp_req->send_pdu; 1242 memset(rsp_pdu, 0, sizeof(*rsp_pdu)); 1243 h2c_data = &rsp_pdu->hdr.h2c_data; 1244 1245 h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA; 1246 plen = h2c_data->common.hlen = sizeof(*h2c_data); 1247 h2c_data->cccid = tcp_req->cid; 1248 h2c_data->ttag = tcp_req->ttag; 1249 h2c_data->datao = tcp_req->datao; 1250 1251 h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata); 1252 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt, 1253 h2c_data->datao, h2c_data->datal); 1254 tcp_req->r2tl_remain -= h2c_data->datal; 1255 1256 if (tqpair->flags.host_hdgst_enable) { 1257 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF; 1258 plen += SPDK_NVME_TCP_DIGEST_LEN; 1259 } 1260 1261 rsp_pdu->padding_len = 0; 1262 pdo = plen; 1263 if (tqpair->cpda) { 1264 alignment = (tqpair->cpda + 1) << 2; 1265 if (alignment > plen) { 1266 rsp_pdu->padding_len = alignment - plen; 1267 pdo = plen = alignment; 1268 } 1269 } 1270 1271 h2c_data->common.pdo = pdo; 1272 plen += h2c_data->datal; 1273 if (tqpair->flags.host_ddgst_enable) { 1274 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF; 1275 plen += SPDK_NVME_TCP_DIGEST_LEN; 1276 } 1277 1278 h2c_data->common.plen = plen; 1279 tcp_req->datao += h2c_data->datal; 1280 if (!tcp_req->r2tl_remain) { 1281 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 1282 } 1283 1284 SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n", 1285 h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair); 1286 1287 nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req); 1288 } 1289 1290 static void 1291 nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu) 1292 { 1293 struct nvme_tcp_req *tcp_req; 1294 struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t; 1295 uint32_t cid, error_offset = 0; 1296 enum spdk_nvme_tcp_term_req_fes fes; 1297 1298 SPDK_DEBUGLOG(nvme, "enter\n"); 1299 cid = r2t->cccid; 1300 tcp_req = get_nvme_active_req_by_cid(tqpair, cid); 1301 if (!tcp_req) { 1302 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair); 1303 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1304 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid); 1305 goto end; 1306 } 1307 1308 SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl, 1309 tqpair); 1310 1311 if (tcp_req->state == NVME_TCP_REQ_ACTIVE) { 1312 assert(tcp_req->active_r2ts == 0); 1313 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T; 1314 } 1315 1316 if (tcp_req->datao != r2t->r2to) { 1317 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1318 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to); 1319 goto end; 1320 1321 } 1322 1323 if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) { 1324 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n", 1325 tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata); 1326 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE; 1327 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl); 1328 goto end; 1329 } 1330 1331 tcp_req->active_r2ts++; 1332 if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) { 1333 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) { 1334 /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */ 1335 SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n"); 1336 assert(tcp_req->active_r2ts == tqpair->maxr2t + 1); 1337 tcp_req->ttag_r2t_next = r2t->ttag; 1338 tcp_req->r2tl_remain_next = r2t->r2tl; 1339 tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1; 1340 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1341 return; 1342 } else { 1343 fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED; 1344 SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t, 1345 tqpair); 1346 goto end; 1347 } 1348 } 1349 1350 tcp_req->ttag = r2t->ttag; 1351 tcp_req->r2tl_remain = r2t->r2tl; 1352 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1353 1354 if (spdk_likely(tcp_req->ordering.bits.send_ack)) { 1355 nvme_tcp_send_h2c_data(tcp_req); 1356 } else { 1357 tcp_req->ordering.bits.h2c_send_waiting_ack = 1; 1358 } 1359 1360 return; 1361 1362 end: 1363 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1364 return; 1365 1366 } 1367 1368 static void 1369 nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1370 { 1371 struct nvme_tcp_pdu *pdu; 1372 int rc; 1373 uint32_t crc32c, error_offset = 0; 1374 enum spdk_nvme_tcp_term_req_fes fes; 1375 1376 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH); 1377 pdu = &tqpair->recv_pdu; 1378 1379 SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type); 1380 /* check header digest if needed */ 1381 if (pdu->has_hdgst) { 1382 crc32c = nvme_tcp_pdu_calc_header_digest(pdu); 1383 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c); 1384 if (rc == 0) { 1385 SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu); 1386 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR; 1387 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1388 return; 1389 1390 } 1391 } 1392 1393 switch (pdu->hdr.common.pdu_type) { 1394 case SPDK_NVME_TCP_PDU_TYPE_IC_RESP: 1395 nvme_tcp_icresp_handle(tqpair, pdu); 1396 break; 1397 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1398 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped); 1399 break; 1400 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA: 1401 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu); 1402 break; 1403 1404 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1405 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu); 1406 break; 1407 case SPDK_NVME_TCP_PDU_TYPE_R2T: 1408 nvme_tcp_r2t_hdr_handle(tqpair, pdu); 1409 break; 1410 1411 default: 1412 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu.hdr.common.pdu_type); 1413 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD; 1414 error_offset = 1; 1415 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset); 1416 break; 1417 } 1418 1419 } 1420 1421 static int 1422 nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped) 1423 { 1424 int rc = 0; 1425 struct nvme_tcp_pdu *pdu; 1426 uint32_t data_len; 1427 enum nvme_tcp_pdu_recv_state prev_state; 1428 1429 /* The loop here is to allow for several back-to-back state changes. */ 1430 do { 1431 prev_state = tqpair->recv_state; 1432 switch (tqpair->recv_state) { 1433 /* If in a new state */ 1434 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY: 1435 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH); 1436 break; 1437 /* common header */ 1438 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH: 1439 pdu = &tqpair->recv_pdu; 1440 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1441 rc = nvme_tcp_read_data(tqpair->sock, 1442 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes, 1443 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes); 1444 if (rc < 0) { 1445 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1446 break; 1447 } 1448 pdu->ch_valid_bytes += rc; 1449 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) { 1450 rc = NVME_TCP_PDU_IN_PROGRESS; 1451 goto out; 1452 } 1453 } 1454 1455 /* The command header of this PDU has now been read from the socket. */ 1456 nvme_tcp_pdu_ch_handle(tqpair); 1457 break; 1458 /* Wait for the pdu specific header */ 1459 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH: 1460 pdu = &tqpair->recv_pdu; 1461 rc = nvme_tcp_read_data(tqpair->sock, 1462 pdu->psh_len - pdu->psh_valid_bytes, 1463 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes); 1464 if (rc < 0) { 1465 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1466 break; 1467 } 1468 1469 pdu->psh_valid_bytes += rc; 1470 if (pdu->psh_valid_bytes < pdu->psh_len) { 1471 rc = NVME_TCP_PDU_IN_PROGRESS; 1472 goto out; 1473 } 1474 1475 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */ 1476 nvme_tcp_pdu_psh_handle(tqpair, reaped); 1477 break; 1478 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD: 1479 pdu = &tqpair->recv_pdu; 1480 /* check whether the data is valid, if not we just return */ 1481 if (!pdu->data_len) { 1482 return NVME_TCP_PDU_IN_PROGRESS; 1483 } 1484 1485 data_len = pdu->data_len; 1486 /* data digest */ 1487 if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) && 1488 tqpair->flags.host_ddgst_enable)) { 1489 data_len += SPDK_NVME_TCP_DIGEST_LEN; 1490 pdu->ddgst_enable = true; 1491 } 1492 1493 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu); 1494 if (rc < 0) { 1495 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR); 1496 break; 1497 } 1498 1499 pdu->readv_offset += rc; 1500 if (pdu->readv_offset < data_len) { 1501 rc = NVME_TCP_PDU_IN_PROGRESS; 1502 goto out; 1503 } 1504 1505 assert(pdu->readv_offset == data_len); 1506 /* All of this PDU has now been read from the socket. */ 1507 nvme_tcp_pdu_payload_handle(tqpair, reaped); 1508 break; 1509 case NVME_TCP_PDU_RECV_STATE_ERROR: 1510 rc = NVME_TCP_PDU_FATAL; 1511 break; 1512 default: 1513 assert(0); 1514 break; 1515 } 1516 } while (prev_state != tqpair->recv_state); 1517 1518 out: 1519 *reaped += tqpair->async_complete; 1520 tqpair->async_complete = 0; 1521 1522 return rc; 1523 } 1524 1525 static void 1526 nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 1527 { 1528 uint64_t t02; 1529 struct nvme_tcp_req *tcp_req, *tmp; 1530 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1531 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1532 struct spdk_nvme_ctrlr_process *active_proc; 1533 1534 /* Don't check timeouts during controller initialization. */ 1535 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 1536 return; 1537 } 1538 1539 if (nvme_qpair_is_admin_queue(qpair)) { 1540 active_proc = nvme_ctrlr_get_current_process(ctrlr); 1541 } else { 1542 active_proc = qpair->active_proc; 1543 } 1544 1545 /* Only check timeouts if the current process has a timeout callback. */ 1546 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 1547 return; 1548 } 1549 1550 t02 = spdk_get_ticks(); 1551 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1552 assert(tcp_req->req != NULL); 1553 1554 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) { 1555 /* 1556 * The requests are in order, so as soon as one has not timed out, 1557 * stop iterating. 1558 */ 1559 break; 1560 } 1561 } 1562 } 1563 1564 static int 1565 nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 1566 { 1567 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1568 uint32_t reaped; 1569 int rc; 1570 1571 rc = spdk_sock_flush(tqpair->sock); 1572 if (rc < 0) { 1573 return rc; 1574 } 1575 1576 if (max_completions == 0) { 1577 max_completions = tqpair->num_entries; 1578 } else { 1579 max_completions = spdk_min(max_completions, tqpair->num_entries); 1580 } 1581 1582 reaped = 0; 1583 do { 1584 rc = nvme_tcp_read_pdu(tqpair, &reaped); 1585 if (rc < 0) { 1586 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n", 1587 errno, spdk_strerror(errno)); 1588 goto fail; 1589 } else if (rc == 0) { 1590 /* Partial PDU is read */ 1591 break; 1592 } 1593 1594 } while (reaped < max_completions); 1595 1596 if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) { 1597 nvme_tcp_qpair_check_timeout(qpair); 1598 } 1599 1600 return reaped; 1601 fail: 1602 1603 /* 1604 * Since admin queues take the ctrlr_lock before entering this function, 1605 * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need 1606 * to call the generic function which will take the lock for us. 1607 */ 1608 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1609 1610 if (nvme_qpair_is_admin_queue(qpair)) { 1611 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair); 1612 } else { 1613 nvme_ctrlr_disconnect_qpair(qpair); 1614 } 1615 return -ENXIO; 1616 } 1617 1618 static void 1619 nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock) 1620 { 1621 struct spdk_nvme_qpair *qpair = ctx; 1622 struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); 1623 int32_t num_completions; 1624 1625 num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); 1626 1627 if (pgroup->num_completions >= 0 && num_completions >= 0) { 1628 pgroup->num_completions += num_completions; 1629 } else { 1630 pgroup->num_completions = -ENXIO; 1631 } 1632 } 1633 1634 static void 1635 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 1636 { 1637 } 1638 1639 static int 1640 nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair) 1641 { 1642 struct spdk_nvme_tcp_ic_req *ic_req; 1643 struct nvme_tcp_pdu *pdu; 1644 uint64_t icreq_timeout_tsc; 1645 int rc; 1646 1647 pdu = tqpair->send_pdu; 1648 memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu)); 1649 ic_req = &pdu->hdr.ic_req; 1650 1651 ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ; 1652 ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req); 1653 ic_req->pfv = 0; 1654 ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1; 1655 ic_req->hpda = NVME_TCP_HPDA_DEFAULT; 1656 1657 ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest; 1658 ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest; 1659 1660 nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair); 1661 1662 icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz()); 1663 do { 1664 if (tqpair->qpair.poll_group) { 1665 rc = (int)nvme_tcp_poll_group_process_completions(tqpair->qpair.poll_group, 0, 1666 dummy_disconnected_qpair_cb); 1667 } else { 1668 rc = nvme_tcp_qpair_process_completions(&tqpair->qpair, 0); 1669 } 1670 } while ((tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) && 1671 (rc == 0) && (spdk_get_ticks() <= icreq_timeout_tsc)); 1672 1673 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) { 1674 SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair); 1675 return -1; 1676 } 1677 1678 SPDK_DEBUGLOG(nvme, "Succesfully construct the tqpair=%p via correct icresp\n", tqpair); 1679 1680 return 0; 1681 } 1682 1683 static int 1684 nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1685 { 1686 struct sockaddr_storage dst_addr; 1687 struct sockaddr_storage src_addr; 1688 int rc; 1689 struct nvme_tcp_qpair *tqpair; 1690 int family; 1691 long int port; 1692 struct spdk_sock_opts opts; 1693 1694 tqpair = nvme_tcp_qpair(qpair); 1695 1696 switch (ctrlr->trid.adrfam) { 1697 case SPDK_NVMF_ADRFAM_IPV4: 1698 family = AF_INET; 1699 break; 1700 case SPDK_NVMF_ADRFAM_IPV6: 1701 family = AF_INET6; 1702 break; 1703 default: 1704 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1705 rc = -1; 1706 return rc; 1707 } 1708 1709 SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1710 1711 memset(&dst_addr, 0, sizeof(dst_addr)); 1712 1713 SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1714 rc = nvme_tcp_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1715 if (rc != 0) { 1716 SPDK_ERRLOG("dst_addr nvme_tcp_parse_addr() failed\n"); 1717 return rc; 1718 } 1719 1720 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1721 memset(&src_addr, 0, sizeof(src_addr)); 1722 rc = nvme_tcp_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1723 if (rc != 0) { 1724 SPDK_ERRLOG("src_addr nvme_tcp_parse_addr() failed\n"); 1725 return rc; 1726 } 1727 } 1728 1729 port = spdk_strtol(ctrlr->trid.trsvcid, 10); 1730 if (port <= 0 || port >= INT_MAX) { 1731 SPDK_ERRLOG("Invalid port: %s\n", ctrlr->trid.trsvcid); 1732 rc = -1; 1733 return rc; 1734 } 1735 1736 opts.opts_size = sizeof(opts); 1737 spdk_sock_get_default_opts(&opts); 1738 opts.priority = ctrlr->trid.priority; 1739 opts.zcopy = !nvme_qpair_is_admin_queue(qpair) && qpair->poll_group != NULL; 1740 tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); 1741 if (!tqpair->sock) { 1742 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", 1743 tqpair, ctrlr->trid.traddr, port); 1744 rc = -1; 1745 return rc; 1746 } 1747 1748 if (qpair->poll_group) { 1749 rc = nvme_poll_group_connect_qpair(qpair); 1750 if (rc) { 1751 SPDK_ERRLOG("Unable to activate the tcp qpair.\n"); 1752 return rc; 1753 } 1754 } 1755 1756 tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT; 1757 /* Explicitly set the state and recv_state of tqpair */ 1758 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; 1759 if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) { 1760 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY); 1761 } 1762 rc = nvme_tcp_qpair_icreq_send(tqpair); 1763 if (rc != 0) { 1764 SPDK_ERRLOG("Unable to connect the tqpair\n"); 1765 return rc; 1766 } 1767 1768 rc = nvme_fabric_qpair_connect(&tqpair->qpair, tqpair->num_entries); 1769 if (rc < 0) { 1770 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1771 return rc; 1772 } 1773 1774 return 0; 1775 } 1776 1777 static struct spdk_nvme_qpair * 1778 nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 1779 uint16_t qid, uint32_t qsize, 1780 enum spdk_nvme_qprio qprio, 1781 uint32_t num_requests) 1782 { 1783 struct nvme_tcp_qpair *tqpair; 1784 struct spdk_nvme_qpair *qpair; 1785 int rc; 1786 1787 tqpair = calloc(1, sizeof(struct nvme_tcp_qpair)); 1788 if (!tqpair) { 1789 SPDK_ERRLOG("failed to get create tqpair\n"); 1790 return NULL; 1791 } 1792 1793 tqpair->num_entries = qsize; 1794 qpair = &tqpair->qpair; 1795 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests); 1796 if (rc != 0) { 1797 free(tqpair); 1798 return NULL; 1799 } 1800 1801 rc = nvme_tcp_alloc_reqs(tqpair); 1802 if (rc) { 1803 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair); 1804 return NULL; 1805 } 1806 1807 return qpair; 1808 } 1809 1810 static struct spdk_nvme_qpair * 1811 nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1812 const struct spdk_nvme_io_qpair_opts *opts) 1813 { 1814 return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 1815 opts->io_queue_requests); 1816 } 1817 1818 static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 1819 const struct spdk_nvme_ctrlr_opts *opts, 1820 void *devhandle) 1821 { 1822 struct nvme_tcp_ctrlr *tctrlr; 1823 union spdk_nvme_cap_register cap; 1824 union spdk_nvme_vs_register vs; 1825 int rc; 1826 1827 tctrlr = calloc(1, sizeof(*tctrlr)); 1828 if (tctrlr == NULL) { 1829 SPDK_ERRLOG("could not allocate ctrlr\n"); 1830 return NULL; 1831 } 1832 1833 tctrlr->ctrlr.opts = *opts; 1834 tctrlr->ctrlr.trid = *trid; 1835 1836 rc = nvme_ctrlr_construct(&tctrlr->ctrlr); 1837 if (rc != 0) { 1838 free(tctrlr); 1839 return NULL; 1840 } 1841 1842 tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0, 1843 tctrlr->ctrlr.opts.admin_queue_size, 0, 1844 tctrlr->ctrlr.opts.admin_queue_size); 1845 if (!tctrlr->ctrlr.adminq) { 1846 SPDK_ERRLOG("failed to create admin qpair\n"); 1847 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1848 return NULL; 1849 } 1850 1851 rc = nvme_transport_ctrlr_connect_qpair(&tctrlr->ctrlr, tctrlr->ctrlr.adminq); 1852 if (rc < 0) { 1853 SPDK_ERRLOG("failed to connect admin qpair\n"); 1854 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr); 1855 return NULL; 1856 } 1857 1858 if (nvme_ctrlr_get_cap(&tctrlr->ctrlr, &cap)) { 1859 SPDK_ERRLOG("get_cap() failed\n"); 1860 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1861 return NULL; 1862 } 1863 1864 if (nvme_ctrlr_get_vs(&tctrlr->ctrlr, &vs)) { 1865 SPDK_ERRLOG("get_vs() failed\n"); 1866 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1867 return NULL; 1868 } 1869 1870 if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) { 1871 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 1872 nvme_ctrlr_destruct(&tctrlr->ctrlr); 1873 return NULL; 1874 } 1875 1876 nvme_ctrlr_init_cap(&tctrlr->ctrlr, &cap, &vs); 1877 1878 return &tctrlr->ctrlr; 1879 } 1880 1881 static uint32_t 1882 nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 1883 { 1884 /* TCP transport doens't limit maximum IO transfer size. */ 1885 return UINT32_MAX; 1886 } 1887 1888 static uint16_t 1889 nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 1890 { 1891 /* 1892 * We do not support >1 SGE in the initiator currently, 1893 * so we can only return 1 here. Once that support is 1894 * added, this should return ctrlr->cdata.nvmf_specific.msdbd 1895 * instead. 1896 */ 1897 return 1; 1898 } 1899 1900 static int 1901 nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 1902 int (*iter_fn)(struct nvme_request *req, void *arg), 1903 void *arg) 1904 { 1905 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1906 struct nvme_tcp_req *tcp_req, *tmp; 1907 int rc; 1908 1909 assert(iter_fn != NULL); 1910 1911 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1912 assert(tcp_req->req != NULL); 1913 1914 rc = iter_fn(tcp_req->req, arg); 1915 if (rc != 0) { 1916 return rc; 1917 } 1918 } 1919 1920 return 0; 1921 } 1922 1923 static void 1924 nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 1925 { 1926 struct nvme_tcp_req *tcp_req, *tmp; 1927 struct spdk_nvme_cpl cpl; 1928 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1929 1930 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 1931 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 1932 1933 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) { 1934 assert(tcp_req->req != NULL); 1935 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 1936 continue; 1937 } 1938 1939 nvme_tcp_req_complete(tcp_req, &cpl); 1940 nvme_tcp_req_put(tqpair, tcp_req); 1941 } 1942 } 1943 1944 static struct spdk_nvme_transport_poll_group * 1945 nvme_tcp_poll_group_create(void) 1946 { 1947 struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group)); 1948 1949 if (group == NULL) { 1950 SPDK_ERRLOG("Unable to allocate poll group.\n"); 1951 return NULL; 1952 } 1953 1954 group->sock_group = spdk_sock_group_create(group); 1955 if (group->sock_group == NULL) { 1956 free(group); 1957 SPDK_ERRLOG("Unable to allocate sock group.\n"); 1958 return NULL; 1959 } 1960 1961 return &group->group; 1962 } 1963 1964 static int 1965 nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 1966 { 1967 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1968 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1969 1970 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 1971 return -EPROTO; 1972 } 1973 return 0; 1974 } 1975 1976 static int 1977 nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 1978 { 1979 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); 1980 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1981 1982 if (tqpair->sock && group->sock_group) { 1983 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { 1984 return -EPROTO; 1985 } 1986 } 1987 return 0; 1988 } 1989 1990 static int 1991 nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 1992 struct spdk_nvme_qpair *qpair) 1993 { 1994 struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); 1995 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 1996 1997 /* disconnected qpairs won't have a sock to add. */ 1998 if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) { 1999 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) { 2000 return -EPROTO; 2001 } 2002 } 2003 2004 return 0; 2005 } 2006 2007 static int 2008 nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2009 struct spdk_nvme_qpair *qpair) 2010 { 2011 if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) { 2012 return nvme_poll_group_disconnect_qpair(qpair); 2013 } 2014 2015 return 0; 2016 } 2017 2018 static int64_t 2019 nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2020 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2021 { 2022 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2023 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2024 2025 group->completions_per_qpair = completions_per_qpair; 2026 group->num_completions = 0; 2027 2028 spdk_sock_group_poll(group->sock_group); 2029 2030 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2031 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2032 } 2033 2034 return group->num_completions; 2035 } 2036 2037 static int 2038 nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2039 { 2040 int rc; 2041 struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); 2042 2043 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2044 return -EBUSY; 2045 } 2046 2047 rc = spdk_sock_group_close(&group->sock_group); 2048 if (rc != 0) { 2049 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n"); 2050 assert(false); 2051 } 2052 2053 free(tgroup); 2054 2055 return 0; 2056 } 2057 2058 const struct spdk_nvme_transport_ops tcp_ops = { 2059 .name = "TCP", 2060 .type = SPDK_NVME_TRANSPORT_TCP, 2061 .ctrlr_construct = nvme_tcp_ctrlr_construct, 2062 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2063 .ctrlr_destruct = nvme_tcp_ctrlr_destruct, 2064 .ctrlr_enable = nvme_tcp_ctrlr_enable, 2065 2066 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2067 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2068 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2069 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2070 2071 .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size, 2072 .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges, 2073 2074 .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair, 2075 .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair, 2076 .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair, 2077 .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair, 2078 2079 .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs, 2080 .qpair_reset = nvme_tcp_qpair_reset, 2081 .qpair_submit_request = nvme_tcp_qpair_submit_request, 2082 .qpair_process_completions = nvme_tcp_qpair_process_completions, 2083 .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests, 2084 .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers, 2085 2086 .poll_group_create = nvme_tcp_poll_group_create, 2087 .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair, 2088 .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair, 2089 .poll_group_add = nvme_tcp_poll_group_add, 2090 .poll_group_remove = nvme_tcp_poll_group_remove, 2091 .poll_group_process_completions = nvme_tcp_poll_group_process_completions, 2092 .poll_group_destroy = nvme_tcp_poll_group_destroy, 2093 }; 2094 2095 SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops); 2096