1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <stdint.h> 39 40 /* Verbs header. */ 41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 42 #ifdef PEDANTIC 43 #pragma GCC diagnostic ignored "-pedantic" 44 #endif 45 #include <infiniband/verbs.h> 46 #ifdef PEDANTIC 47 #pragma GCC diagnostic error "-pedantic" 48 #endif 49 50 /* DPDK headers don't like -pedantic. */ 51 #ifdef PEDANTIC 52 #pragma GCC diagnostic ignored "-pedantic" 53 #endif 54 #include <rte_mbuf.h> 55 #include <rte_malloc.h> 56 #include <rte_ethdev.h> 57 #include <rte_common.h> 58 #ifdef PEDANTIC 59 #pragma GCC diagnostic error "-pedantic" 60 #endif 61 62 #include "mlx5_utils.h" 63 #include "mlx5.h" 64 #include "mlx5_rxtx.h" 65 #include "mlx5_autoconf.h" 66 #include "mlx5_defs.h" 67 68 /** 69 * Allocate TX queue elements. 70 * 71 * @param txq 72 * Pointer to TX queue structure. 73 * @param elts_n 74 * Number of elements to allocate. 75 * 76 * @return 77 * 0 on success, errno value on failure. 78 */ 79 static int 80 txq_alloc_elts(struct txq *txq, unsigned int elts_n) 81 { 82 unsigned int i; 83 struct txq_elt (*elts)[elts_n] = 84 rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket); 85 linear_t (*elts_linear)[elts_n] = 86 rte_calloc_socket("TXQ", 1, sizeof(*elts_linear), 0, 87 txq->socket); 88 struct ibv_mr *mr_linear = NULL; 89 int ret = 0; 90 91 if ((elts == NULL) || (elts_linear == NULL)) { 92 ERROR("%p: can't allocate packets array", (void *)txq); 93 ret = ENOMEM; 94 goto error; 95 } 96 mr_linear = 97 ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear), 98 (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); 99 if (mr_linear == NULL) { 100 ERROR("%p: unable to configure MR, ibv_reg_mr() failed", 101 (void *)txq); 102 ret = EINVAL; 103 goto error; 104 } 105 for (i = 0; (i != elts_n); ++i) { 106 struct txq_elt *elt = &(*elts)[i]; 107 108 elt->buf = NULL; 109 } 110 DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n); 111 txq->elts_n = elts_n; 112 txq->elts = elts; 113 txq->elts_head = 0; 114 txq->elts_tail = 0; 115 txq->elts_comp = 0; 116 /* Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or 117 * at least 4 times per ring. */ 118 txq->elts_comp_cd_init = 119 ((MLX5_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ? 120 MLX5_PMD_TX_PER_COMP_REQ : (elts_n / 4)); 121 txq->elts_comp_cd = txq->elts_comp_cd_init; 122 txq->elts_linear = elts_linear; 123 txq->mr_linear = mr_linear; 124 assert(ret == 0); 125 return 0; 126 error: 127 if (mr_linear != NULL) 128 claim_zero(ibv_dereg_mr(mr_linear)); 129 130 rte_free(elts_linear); 131 rte_free(elts); 132 133 DEBUG("%p: failed, freed everything", (void *)txq); 134 assert(ret > 0); 135 return ret; 136 } 137 138 /** 139 * Free TX queue elements. 140 * 141 * @param txq 142 * Pointer to TX queue structure. 143 */ 144 static void 145 txq_free_elts(struct txq *txq) 146 { 147 unsigned int i; 148 unsigned int elts_n = txq->elts_n; 149 struct txq_elt (*elts)[elts_n] = txq->elts; 150 linear_t (*elts_linear)[elts_n] = txq->elts_linear; 151 struct ibv_mr *mr_linear = txq->mr_linear; 152 153 DEBUG("%p: freeing WRs", (void *)txq); 154 txq->elts_n = 0; 155 txq->elts = NULL; 156 txq->elts_linear = NULL; 157 txq->mr_linear = NULL; 158 if (mr_linear != NULL) 159 claim_zero(ibv_dereg_mr(mr_linear)); 160 161 rte_free(elts_linear); 162 if (elts == NULL) 163 return; 164 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 165 struct txq_elt *elt = &(*elts)[i]; 166 167 if (elt->buf == NULL) 168 continue; 169 rte_pktmbuf_free(elt->buf); 170 } 171 rte_free(elts); 172 } 173 174 /** 175 * Clean up a TX queue. 176 * 177 * Destroy objects, free allocated memory and reset the structure for reuse. 178 * 179 * @param txq 180 * Pointer to TX queue structure. 181 */ 182 void 183 txq_cleanup(struct txq *txq) 184 { 185 struct ibv_exp_release_intf_params params; 186 size_t i; 187 188 DEBUG("cleaning up %p", (void *)txq); 189 txq_free_elts(txq); 190 if (txq->if_qp != NULL) { 191 assert(txq->priv != NULL); 192 assert(txq->priv->ctx != NULL); 193 assert(txq->qp != NULL); 194 params = (struct ibv_exp_release_intf_params){ 195 .comp_mask = 0, 196 }; 197 claim_zero(ibv_exp_release_intf(txq->priv->ctx, 198 txq->if_qp, 199 ¶ms)); 200 } 201 if (txq->if_cq != NULL) { 202 assert(txq->priv != NULL); 203 assert(txq->priv->ctx != NULL); 204 assert(txq->cq != NULL); 205 params = (struct ibv_exp_release_intf_params){ 206 .comp_mask = 0, 207 }; 208 claim_zero(ibv_exp_release_intf(txq->priv->ctx, 209 txq->if_cq, 210 ¶ms)); 211 } 212 if (txq->qp != NULL) 213 claim_zero(ibv_destroy_qp(txq->qp)); 214 if (txq->cq != NULL) 215 claim_zero(ibv_destroy_cq(txq->cq)); 216 if (txq->rd != NULL) { 217 struct ibv_exp_destroy_res_domain_attr attr = { 218 .comp_mask = 0, 219 }; 220 221 assert(txq->priv != NULL); 222 assert(txq->priv->ctx != NULL); 223 claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx, 224 txq->rd, 225 &attr)); 226 } 227 for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { 228 if (txq->mp2mr[i].mp == NULL) 229 break; 230 assert(txq->mp2mr[i].mr != NULL); 231 claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr)); 232 } 233 memset(txq, 0, sizeof(*txq)); 234 } 235 236 /** 237 * Configure a TX queue. 238 * 239 * @param dev 240 * Pointer to Ethernet device structure. 241 * @param txq 242 * Pointer to TX queue structure. 243 * @param desc 244 * Number of descriptors to configure in queue. 245 * @param socket 246 * NUMA socket on which memory must be allocated. 247 * @param[in] conf 248 * Thresholds parameters. 249 * 250 * @return 251 * 0 on success, errno value on failure. 252 */ 253 static int 254 txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc, 255 unsigned int socket, const struct rte_eth_txconf *conf) 256 { 257 struct priv *priv = dev->data->dev_private; 258 struct txq tmpl = { 259 .priv = priv, 260 .socket = socket 261 }; 262 union { 263 struct ibv_exp_query_intf_params params; 264 struct ibv_exp_qp_init_attr init; 265 struct ibv_exp_res_domain_init_attr rd; 266 struct ibv_exp_cq_init_attr cq; 267 struct ibv_exp_qp_attr mod; 268 } attr; 269 enum ibv_exp_query_intf_status status; 270 int ret = 0; 271 272 (void)conf; /* Thresholds configuration (ignored). */ 273 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) { 274 ERROR("%p: invalid number of TX descriptors (must be a" 275 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N); 276 return EINVAL; 277 } 278 desc /= MLX5_PMD_SGE_WR_N; 279 /* MRs will be registered in mp2mr[] later. */ 280 attr.rd = (struct ibv_exp_res_domain_init_attr){ 281 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | 282 IBV_EXP_RES_DOMAIN_MSG_MODEL), 283 .thread_model = IBV_EXP_THREAD_SINGLE, 284 .msg_model = IBV_EXP_MSG_HIGH_BW, 285 }; 286 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd); 287 if (tmpl.rd == NULL) { 288 ret = ENOMEM; 289 ERROR("%p: RD creation failure: %s", 290 (void *)dev, strerror(ret)); 291 goto error; 292 } 293 attr.cq = (struct ibv_exp_cq_init_attr){ 294 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN, 295 .res_domain = tmpl.rd, 296 }; 297 tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq); 298 if (tmpl.cq == NULL) { 299 ret = ENOMEM; 300 ERROR("%p: CQ creation failure: %s", 301 (void *)dev, strerror(ret)); 302 goto error; 303 } 304 DEBUG("priv->device_attr.max_qp_wr is %d", 305 priv->device_attr.max_qp_wr); 306 DEBUG("priv->device_attr.max_sge is %d", 307 priv->device_attr.max_sge); 308 attr.init = (struct ibv_exp_qp_init_attr){ 309 /* CQ to be associated with the send queue. */ 310 .send_cq = tmpl.cq, 311 /* CQ to be associated with the receive queue. */ 312 .recv_cq = tmpl.cq, 313 .cap = { 314 /* Max number of outstanding WRs. */ 315 .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ? 316 priv->device_attr.max_qp_wr : 317 desc), 318 /* Max number of scatter/gather elements in a WR. */ 319 .max_send_sge = ((priv->device_attr.max_sge < 320 MLX5_PMD_SGE_WR_N) ? 321 priv->device_attr.max_sge : 322 MLX5_PMD_SGE_WR_N), 323 #if MLX5_PMD_MAX_INLINE > 0 324 .max_inline_data = MLX5_PMD_MAX_INLINE, 325 #endif 326 }, 327 .qp_type = IBV_QPT_RAW_PACKET, 328 /* Do *NOT* enable this, completions events are managed per 329 * TX burst. */ 330 .sq_sig_all = 0, 331 .pd = priv->pd, 332 .res_domain = tmpl.rd, 333 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | 334 IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), 335 }; 336 tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init); 337 if (tmpl.qp == NULL) { 338 ret = (errno ? errno : EINVAL); 339 ERROR("%p: QP creation failure: %s", 340 (void *)dev, strerror(ret)); 341 goto error; 342 } 343 #if MLX5_PMD_MAX_INLINE > 0 344 /* ibv_create_qp() updates this value. */ 345 tmpl.max_inline = attr.init.cap.max_inline_data; 346 #endif 347 attr.mod = (struct ibv_exp_qp_attr){ 348 /* Move the QP to this state. */ 349 .qp_state = IBV_QPS_INIT, 350 /* Primary port number. */ 351 .port_num = priv->port 352 }; 353 ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, 354 (IBV_EXP_QP_STATE | IBV_EXP_QP_PORT)); 355 if (ret) { 356 ERROR("%p: QP state to IBV_QPS_INIT failed: %s", 357 (void *)dev, strerror(ret)); 358 goto error; 359 } 360 ret = txq_alloc_elts(&tmpl, desc); 361 if (ret) { 362 ERROR("%p: TXQ allocation failed: %s", 363 (void *)dev, strerror(ret)); 364 goto error; 365 } 366 attr.mod = (struct ibv_exp_qp_attr){ 367 .qp_state = IBV_QPS_RTR 368 }; 369 ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE); 370 if (ret) { 371 ERROR("%p: QP state to IBV_QPS_RTR failed: %s", 372 (void *)dev, strerror(ret)); 373 goto error; 374 } 375 attr.mod.qp_state = IBV_QPS_RTS; 376 ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE); 377 if (ret) { 378 ERROR("%p: QP state to IBV_QPS_RTS failed: %s", 379 (void *)dev, strerror(ret)); 380 goto error; 381 } 382 attr.params = (struct ibv_exp_query_intf_params){ 383 .intf_scope = IBV_EXP_INTF_GLOBAL, 384 .intf = IBV_EXP_INTF_CQ, 385 .obj = tmpl.cq, 386 }; 387 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); 388 if (tmpl.if_cq == NULL) { 389 ret = EINVAL; 390 ERROR("%p: CQ interface family query failed with status %d", 391 (void *)dev, status); 392 goto error; 393 } 394 attr.params = (struct ibv_exp_query_intf_params){ 395 .intf_scope = IBV_EXP_INTF_GLOBAL, 396 .intf = IBV_EXP_INTF_QP_BURST, 397 .obj = tmpl.qp, 398 }; 399 tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); 400 if (tmpl.if_qp == NULL) { 401 ret = EINVAL; 402 ERROR("%p: QP interface family query failed with status %d", 403 (void *)dev, status); 404 goto error; 405 } 406 /* Clean up txq in case we're reinitializing it. */ 407 DEBUG("%p: cleaning-up old txq just in case", (void *)txq); 408 txq_cleanup(txq); 409 *txq = tmpl; 410 DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl); 411 assert(ret == 0); 412 return 0; 413 error: 414 txq_cleanup(&tmpl); 415 assert(ret > 0); 416 return ret; 417 } 418 419 /** 420 * DPDK callback to configure a TX queue. 421 * 422 * @param dev 423 * Pointer to Ethernet device structure. 424 * @param idx 425 * TX queue index. 426 * @param desc 427 * Number of descriptors to configure in queue. 428 * @param socket 429 * NUMA socket on which memory must be allocated. 430 * @param[in] conf 431 * Thresholds parameters. 432 * 433 * @return 434 * 0 on success, negative errno value on failure. 435 */ 436 int 437 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 438 unsigned int socket, const struct rte_eth_txconf *conf) 439 { 440 struct priv *priv = dev->data->dev_private; 441 struct txq *txq = (*priv->txqs)[idx]; 442 int ret; 443 444 priv_lock(priv); 445 DEBUG("%p: configuring queue %u for %u descriptors", 446 (void *)dev, idx, desc); 447 if (idx >= priv->txqs_n) { 448 ERROR("%p: queue index out of range (%u >= %u)", 449 (void *)dev, idx, priv->txqs_n); 450 priv_unlock(priv); 451 return -EOVERFLOW; 452 } 453 if (txq != NULL) { 454 DEBUG("%p: reusing already allocated queue index %u (%p)", 455 (void *)dev, idx, (void *)txq); 456 if (priv->started) { 457 priv_unlock(priv); 458 return -EEXIST; 459 } 460 (*priv->txqs)[idx] = NULL; 461 txq_cleanup(txq); 462 } else { 463 txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket); 464 if (txq == NULL) { 465 ERROR("%p: unable to allocate queue index %u", 466 (void *)dev, idx); 467 priv_unlock(priv); 468 return -ENOMEM; 469 } 470 } 471 ret = txq_setup(dev, txq, desc, socket, conf); 472 if (ret) 473 rte_free(txq); 474 else { 475 txq->stats.idx = idx; 476 DEBUG("%p: adding TX queue %p to list", 477 (void *)dev, (void *)txq); 478 (*priv->txqs)[idx] = txq; 479 /* Update send callback. */ 480 dev->tx_pkt_burst = mlx5_tx_burst; 481 } 482 priv_unlock(priv); 483 return -ret; 484 } 485 486 /** 487 * DPDK callback to release a TX queue. 488 * 489 * @param dpdk_txq 490 * Generic TX queue pointer. 491 */ 492 void 493 mlx5_tx_queue_release(void *dpdk_txq) 494 { 495 struct txq *txq = (struct txq *)dpdk_txq; 496 struct priv *priv; 497 unsigned int i; 498 499 if (txq == NULL) 500 return; 501 priv = txq->priv; 502 priv_lock(priv); 503 for (i = 0; (i != priv->txqs_n); ++i) 504 if ((*priv->txqs)[i] == txq) { 505 DEBUG("%p: removing TX queue %p from list", 506 (void *)priv->dev, (void *)txq); 507 (*priv->txqs)[i] = NULL; 508 break; 509 } 510 txq_cleanup(txq); 511 rte_free(txq); 512 priv_unlock(priv); 513 } 514