1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_paging.h> 14 15 #include <mlx5_malloc.h> 16 17 #include "mlx5.h" 18 #include "mlx5_rxtx.h" 19 #include "mlx5_common_os.h" 20 21 static const char * const mlx5_txpp_stat_names[] = { 22 "txpp_err_miss_int", /* Missed service interrupt. */ 23 "txpp_err_rearm_queue", /* Rearm Queue errors. */ 24 "txpp_err_clock_queue", /* Clock Queue errors. */ 25 "txpp_err_ts_past", /* Timestamp in the past. */ 26 "txpp_err_ts_future", /* Timestamp in the distant future. */ 27 "txpp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 28 "txpp_wander", /* Timestamp jitter (half of Clock Queue completions). */ 29 "txpp_sync_lost", /* Scheduling synchronization lost. */ 30 }; 31 32 /* Destroy Event Queue Notification Channel. */ 33 static void 34 mlx5_txpp_destroy_eqn(struct mlx5_dev_ctx_shared *sh) 35 { 36 if (sh->txpp.echan) { 37 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 38 sh->txpp.echan = NULL; 39 } 40 sh->txpp.eqn = 0; 41 } 42 43 /* Create Event Queue Notification Channel. */ 44 static int 45 mlx5_txpp_create_eqn(struct mlx5_dev_ctx_shared *sh) 46 { 47 uint32_t lcore; 48 49 MLX5_ASSERT(!sh->txpp.echan); 50 lcore = (uint32_t)rte_lcore_to_cpu_id(-1); 51 if (mlx5_glue->devx_query_eqn(sh->ctx, lcore, &sh->txpp.eqn)) { 52 rte_errno = errno; 53 DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno); 54 sh->txpp.eqn = 0; 55 return -rte_errno; 56 } 57 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 58 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 59 if (!sh->txpp.echan) { 60 sh->txpp.eqn = 0; 61 rte_errno = errno; 62 DRV_LOG(ERR, "Failed to create event channel %d.", 63 rte_errno); 64 return -rte_errno; 65 } 66 return 0; 67 } 68 69 static void 70 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 71 { 72 if (sh->txpp.pp) { 73 mlx5_glue->dv_free_pp(sh->txpp.pp); 74 sh->txpp.pp = NULL; 75 sh->txpp.pp_id = 0; 76 } 77 } 78 79 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 80 static int 81 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 82 { 83 #ifdef HAVE_MLX5DV_PP_ALLOC 84 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 85 uint64_t rate; 86 87 MLX5_ASSERT(!sh->txpp.pp); 88 memset(&pp, 0, sizeof(pp)); 89 rate = NS_PER_S / sh->txpp.tick; 90 if (rate * sh->txpp.tick != NS_PER_S) 91 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 92 if (sh->txpp.test) { 93 uint32_t len; 94 95 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 96 (size_t)RTE_ETHER_MIN_LEN); 97 MLX5_SET(set_pp_rate_limit_context, &pp, 98 burst_upper_bound, len); 99 MLX5_SET(set_pp_rate_limit_context, &pp, 100 typical_packet_size, len); 101 /* Convert packets per second into kilobits. */ 102 rate = (rate * len) / (1000ul / CHAR_BIT); 103 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 104 } 105 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 106 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 107 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 108 sh->txpp.pp = mlx5_glue->dv_alloc_pp 109 (sh->ctx, sizeof(pp), &pp, 110 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 111 if (sh->txpp.pp == NULL) { 112 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 113 rte_errno = errno; 114 return -errno; 115 } 116 if (!sh->txpp.pp->index) { 117 DRV_LOG(ERR, "Zero packet pacing index allocated."); 118 mlx5_txpp_free_pp_index(sh); 119 rte_errno = ENOTSUP; 120 return -ENOTSUP; 121 } 122 sh->txpp.pp_id = sh->txpp.pp->index; 123 return 0; 124 #else 125 RTE_SET_USED(sh); 126 DRV_LOG(ERR, "Allocating pacing index is not supported."); 127 rte_errno = ENOTSUP; 128 return -ENOTSUP; 129 #endif 130 } 131 132 static void 133 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 134 { 135 if (wq->sq) 136 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 137 if (wq->sq_umem) 138 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 139 if (wq->sq_buf) 140 mlx5_free((void *)(uintptr_t)wq->sq_buf); 141 if (wq->cq) 142 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 143 if (wq->cq_umem) 144 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 145 if (wq->cq_buf) 146 mlx5_free((void *)(uintptr_t)wq->cq_buf); 147 memset(wq, 0, sizeof(*wq)); 148 } 149 150 static void 151 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 152 { 153 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 154 155 mlx5_txpp_destroy_send_queue(wq); 156 } 157 158 static void 159 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 160 { 161 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 162 163 mlx5_txpp_destroy_send_queue(wq); 164 if (sh->txpp.tsa) { 165 mlx5_free(sh->txpp.tsa); 166 sh->txpp.tsa = NULL; 167 } 168 } 169 170 static void 171 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 172 { 173 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 174 union { 175 uint32_t w32[2]; 176 uint64_t w64; 177 } cs; 178 179 wq->sq_ci = ci + 1; 180 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 181 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 182 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 183 /* Update SQ doorbell record with new SQ ci. */ 184 rte_compiler_barrier(); 185 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 186 /* Make sure the doorbell record is updated. */ 187 rte_wmb(); 188 /* Write to doorbel register to start processing. */ 189 __mlx5_uar_write64_relaxed(cs.w64, sh->tx_uar->reg_addr, NULL); 190 rte_wmb(); 191 } 192 193 static void 194 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 195 { 196 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 197 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 198 uint32_t i; 199 200 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 201 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 202 ++cqe; 203 } 204 } 205 206 static void 207 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 208 { 209 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 210 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 211 uint32_t i; 212 213 for (i = 0; i < wq->sq_size; i += 2) { 214 struct mlx5_wqe_cseg *cs; 215 struct mlx5_wqe_qseg *qs; 216 uint32_t index; 217 218 /* Build SEND_EN request with slave WQE index. */ 219 cs = &wqe[i + 0].cseg; 220 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 221 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 222 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 223 MLX5_COMP_MODE_OFFSET); 224 cs->misc = RTE_BE32(0); 225 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 226 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 227 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 228 qs->max_index = rte_cpu_to_be_32(index); 229 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 230 /* Build WAIT request with slave CQE index. */ 231 cs = &wqe[i + 1].cseg; 232 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 233 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 234 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 235 MLX5_COMP_MODE_OFFSET); 236 cs->misc = RTE_BE32(0); 237 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 238 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 239 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 240 qs->max_index = rte_cpu_to_be_32(index); 241 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 242 } 243 } 244 245 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 246 static int 247 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 248 { 249 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 250 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 251 struct mlx5_devx_cq_attr cq_attr = { 0 }; 252 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 253 size_t page_size; 254 uint32_t umem_size, umem_dbrec; 255 int ret; 256 257 page_size = rte_mem_page_size(); 258 if (page_size == (size_t)-1) { 259 DRV_LOG(ERR, "Failed to get mem page size"); 260 return -ENOMEM; 261 } 262 /* Allocate memory buffer for CQEs and doorbell record. */ 263 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 264 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 265 umem_size += MLX5_DBR_SIZE; 266 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 267 page_size, sh->numa_node); 268 if (!wq->cq_buf) { 269 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 270 return -ENOMEM; 271 } 272 /* Register allocated buffer in user space with DevX. */ 273 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 274 (void *)(uintptr_t)wq->cq_buf, 275 umem_size, 276 IBV_ACCESS_LOCAL_WRITE); 277 if (!wq->cq_umem) { 278 rte_errno = errno; 279 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 280 goto error; 281 } 282 /* Create completion queue object for Rearm Queue. */ 283 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 284 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 285 cq_attr.uar_page_id = sh->tx_uar->page_id; 286 cq_attr.eqn = sh->txpp.eqn; 287 cq_attr.q_umem_valid = 1; 288 cq_attr.q_umem_offset = 0; 289 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 290 cq_attr.db_umem_valid = 1; 291 cq_attr.db_umem_offset = umem_dbrec; 292 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 293 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 294 cq_attr.log_page_size = rte_log2_u32(page_size); 295 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 296 if (!wq->cq) { 297 rte_errno = errno; 298 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 299 goto error; 300 } 301 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 302 wq->cq_ci = 0; 303 wq->arm_sn = 0; 304 /* Mark all CQEs initially as invalid. */ 305 mlx5_txpp_fill_cqe_rearm_queue(sh); 306 /* 307 * Allocate memory buffer for Send Queue WQEs. 308 * There should be no WQE leftovers in the cyclic queue. 309 */ 310 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 311 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 312 umem_size = MLX5_WQE_SIZE * wq->sq_size; 313 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 314 umem_size += MLX5_DBR_SIZE; 315 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 316 page_size, sh->numa_node); 317 if (!wq->sq_buf) { 318 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 319 rte_errno = ENOMEM; 320 goto error; 321 } 322 /* Register allocated buffer in user space with DevX. */ 323 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 324 (void *)(uintptr_t)wq->sq_buf, 325 umem_size, 326 IBV_ACCESS_LOCAL_WRITE); 327 if (!wq->sq_umem) { 328 rte_errno = errno; 329 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 330 goto error; 331 } 332 /* Create send queue object for Rearm Queue. */ 333 sq_attr.state = MLX5_SQC_STATE_RST; 334 sq_attr.tis_lst_sz = 1; 335 sq_attr.tis_num = sh->tis->id; 336 sq_attr.cqn = wq->cq->id; 337 sq_attr.cd_master = 1; 338 sq_attr.wq_attr.uar_page = sh->tx_uar->page_id; 339 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 340 sq_attr.wq_attr.pd = sh->pdn; 341 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 342 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 343 sq_attr.wq_attr.dbr_umem_valid = 1; 344 sq_attr.wq_attr.dbr_addr = umem_dbrec; 345 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 346 sq_attr.wq_attr.wq_umem_valid = 1; 347 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 348 sq_attr.wq_attr.wq_umem_offset = 0; 349 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 350 if (!wq->sq) { 351 rte_errno = errno; 352 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 353 goto error; 354 } 355 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 356 MLX5_SND_DBR * sizeof(uint32_t)); 357 /* Build the WQEs in the Send Queue before goto Ready state. */ 358 mlx5_txpp_fill_wqe_rearm_queue(sh); 359 /* Change queue state to ready. */ 360 msq_attr.sq_state = MLX5_SQC_STATE_RST; 361 msq_attr.state = MLX5_SQC_STATE_RDY; 362 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 363 if (ret) { 364 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 365 goto error; 366 } 367 return 0; 368 error: 369 ret = -rte_errno; 370 mlx5_txpp_destroy_rearm_queue(sh); 371 rte_errno = -ret; 372 return ret; 373 } 374 375 static void 376 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 377 { 378 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 379 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 380 struct mlx5_wqe_cseg *cs = &wqe->cseg; 381 uint32_t wqe_size, opcode, i; 382 uint8_t *dst; 383 384 /* For test purposes fill the WQ with SEND inline packet. */ 385 if (sh->txpp.test) { 386 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 387 MLX5_WQE_CSEG_SIZE + 388 2 * MLX5_WQE_ESEG_SIZE - 389 MLX5_ESEG_MIN_INLINE_SIZE, 390 MLX5_WSEG_SIZE); 391 opcode = MLX5_OPCODE_SEND; 392 } else { 393 wqe_size = MLX5_WSEG_SIZE; 394 opcode = MLX5_OPCODE_NOP; 395 } 396 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 397 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 398 (wqe_size / MLX5_WSEG_SIZE)); 399 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 400 cs->misc = RTE_BE32(0); 401 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 402 if (sh->txpp.test) { 403 struct mlx5_wqe_eseg *es = &wqe->eseg; 404 struct rte_ether_hdr *eth_hdr; 405 struct rte_ipv4_hdr *ip_hdr; 406 struct rte_udp_hdr *udp_hdr; 407 408 /* Build the inline test packet pattern. */ 409 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 410 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 411 (sizeof(struct rte_ether_hdr) + 412 sizeof(struct rte_ipv4_hdr))); 413 es->flags = 0; 414 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 415 es->swp_offs = 0; 416 es->metadata = 0; 417 es->swp_flags = 0; 418 es->mss = 0; 419 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 420 /* Build test packet L2 header (Ethernet). */ 421 dst = (uint8_t *)&es->inline_data; 422 eth_hdr = (struct rte_ether_hdr *)dst; 423 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 424 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 425 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 426 /* Build test packet L3 header (IP v4). */ 427 dst += sizeof(struct rte_ether_hdr); 428 ip_hdr = (struct rte_ipv4_hdr *)dst; 429 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 430 ip_hdr->type_of_service = 0; 431 ip_hdr->fragment_offset = 0; 432 ip_hdr->time_to_live = 64; 433 ip_hdr->next_proto_id = IPPROTO_UDP; 434 ip_hdr->packet_id = 0; 435 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 436 sizeof(struct rte_ether_hdr)); 437 /* use RFC5735 / RFC2544 reserved network test addresses */ 438 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 439 (0 << 8) | 1); 440 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 441 (0 << 8) | 2); 442 if (MLX5_TXPP_TEST_PKT_SIZE < 443 (sizeof(struct rte_ether_hdr) + 444 sizeof(struct rte_ipv4_hdr) + 445 sizeof(struct rte_udp_hdr))) 446 goto wcopy; 447 /* Build test packet L4 header (UDP). */ 448 dst += sizeof(struct rte_ipv4_hdr); 449 udp_hdr = (struct rte_udp_hdr *)dst; 450 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 451 udp_hdr->dst_port = RTE_BE16(9); 452 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 453 sizeof(struct rte_ether_hdr) - 454 sizeof(struct rte_ipv4_hdr)); 455 udp_hdr->dgram_cksum = 0; 456 /* Fill the test packet data. */ 457 dst += sizeof(struct rte_udp_hdr); 458 for (i = sizeof(struct rte_ether_hdr) + 459 sizeof(struct rte_ipv4_hdr) + 460 sizeof(struct rte_udp_hdr); 461 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 462 *dst++ = (uint8_t)(i & 0xFF); 463 } 464 wcopy: 465 /* Duplicate the pattern to the next WQEs. */ 466 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 467 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 468 dst += wqe_size; 469 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 470 } 471 } 472 473 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 474 static int 475 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 476 { 477 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 478 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 479 struct mlx5_devx_cq_attr cq_attr = { 0 }; 480 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 481 size_t page_size; 482 uint32_t umem_size, umem_dbrec; 483 int ret; 484 485 page_size = rte_mem_page_size(); 486 if (page_size == (size_t)-1) { 487 DRV_LOG(ERR, "Failed to get mem page size"); 488 return -ENOMEM; 489 } 490 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 491 MLX5_TXPP_REARM_SQ_SIZE * 492 sizeof(struct mlx5_txpp_ts), 493 0, sh->numa_node); 494 if (!sh->txpp.tsa) { 495 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 496 return -ENOMEM; 497 } 498 sh->txpp.ts_p = 0; 499 sh->txpp.ts_n = 0; 500 /* Allocate memory buffer for CQEs and doorbell record. */ 501 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 502 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 503 umem_size += MLX5_DBR_SIZE; 504 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 505 page_size, sh->numa_node); 506 if (!wq->cq_buf) { 507 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 508 return -ENOMEM; 509 } 510 /* Register allocated buffer in user space with DevX. */ 511 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 512 (void *)(uintptr_t)wq->cq_buf, 513 umem_size, 514 IBV_ACCESS_LOCAL_WRITE); 515 if (!wq->cq_umem) { 516 rte_errno = errno; 517 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 518 goto error; 519 } 520 /* Create completion queue object for Clock Queue. */ 521 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 522 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 523 cq_attr.use_first_only = 1; 524 cq_attr.overrun_ignore = 1; 525 cq_attr.uar_page_id = sh->tx_uar->page_id; 526 cq_attr.eqn = sh->txpp.eqn; 527 cq_attr.q_umem_valid = 1; 528 cq_attr.q_umem_offset = 0; 529 cq_attr.q_umem_id = wq->cq_umem->umem_id; 530 cq_attr.db_umem_valid = 1; 531 cq_attr.db_umem_offset = umem_dbrec; 532 cq_attr.db_umem_id = wq->cq_umem->umem_id; 533 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 534 cq_attr.log_page_size = rte_log2_u32(page_size); 535 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 536 if (!wq->cq) { 537 rte_errno = errno; 538 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 539 goto error; 540 } 541 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 542 wq->cq_ci = 0; 543 /* Allocate memory buffer for Send Queue WQEs. */ 544 if (sh->txpp.test) { 545 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 546 MLX5_WQE_CSEG_SIZE + 547 2 * MLX5_WQE_ESEG_SIZE - 548 MLX5_ESEG_MIN_INLINE_SIZE, 549 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 550 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 551 } else { 552 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 553 } 554 /* There should not be WQE leftovers in the cyclic queue. */ 555 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 556 umem_size = MLX5_WQE_SIZE * wq->sq_size; 557 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 558 umem_size += MLX5_DBR_SIZE; 559 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 560 page_size, sh->numa_node); 561 if (!wq->sq_buf) { 562 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 563 rte_errno = ENOMEM; 564 goto error; 565 } 566 /* Register allocated buffer in user space with DevX. */ 567 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 568 (void *)(uintptr_t)wq->sq_buf, 569 umem_size, 570 IBV_ACCESS_LOCAL_WRITE); 571 if (!wq->sq_umem) { 572 rte_errno = errno; 573 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 574 goto error; 575 } 576 /* Create send queue object for Clock Queue. */ 577 if (sh->txpp.test) { 578 sq_attr.tis_lst_sz = 1; 579 sq_attr.tis_num = sh->tis->id; 580 sq_attr.non_wire = 0; 581 sq_attr.static_sq_wq = 1; 582 } else { 583 sq_attr.non_wire = 1; 584 sq_attr.static_sq_wq = 1; 585 } 586 sq_attr.state = MLX5_SQC_STATE_RST; 587 sq_attr.cqn = wq->cq->id; 588 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 589 sq_attr.wq_attr.cd_slave = 1; 590 sq_attr.wq_attr.uar_page = sh->tx_uar->page_id; 591 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 592 sq_attr.wq_attr.pd = sh->pdn; 593 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 594 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 595 sq_attr.wq_attr.dbr_umem_valid = 1; 596 sq_attr.wq_attr.dbr_addr = umem_dbrec; 597 sq_attr.wq_attr.dbr_umem_id = wq->sq_umem->umem_id; 598 sq_attr.wq_attr.wq_umem_valid = 1; 599 sq_attr.wq_attr.wq_umem_id = wq->sq_umem->umem_id; 600 /* umem_offset must be zero for static_sq_wq queue. */ 601 sq_attr.wq_attr.wq_umem_offset = 0; 602 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 603 if (!wq->sq) { 604 rte_errno = errno; 605 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 606 goto error; 607 } 608 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 609 MLX5_SND_DBR * sizeof(uint32_t)); 610 /* Build the WQEs in the Send Queue before goto Ready state. */ 611 mlx5_txpp_fill_wqe_clock_queue(sh); 612 /* Change queue state to ready. */ 613 msq_attr.sq_state = MLX5_SQC_STATE_RST; 614 msq_attr.state = MLX5_SQC_STATE_RDY; 615 wq->sq_ci = 0; 616 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 617 if (ret) { 618 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 619 goto error; 620 } 621 return 0; 622 error: 623 ret = -rte_errno; 624 mlx5_txpp_destroy_clock_queue(sh); 625 rte_errno = -ret; 626 return ret; 627 } 628 629 /* Enable notification from the Rearm Queue CQ. */ 630 static inline void 631 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 632 { 633 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 634 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 635 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 636 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 637 uint32_t *addr = RTE_PTR_ADD(sh->tx_uar->base_addr, MLX5_CQ_DOORBELL); 638 639 rte_compiler_barrier(); 640 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 641 rte_wmb(); 642 #ifdef RTE_ARCH_64 643 *(uint64_t *)addr = db_be; 644 #else 645 *(uint32_t *)addr = db_be; 646 rte_io_wmb(); 647 *((uint32_t *)addr + 1) = db_be >> 32; 648 #endif 649 aq->arm_sn++; 650 } 651 652 static inline void 653 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 654 { 655 /* 656 * The only CQE of Clock Queue is being continuously 657 * update by hardware with soecified rate. We have to 658 * read timestump and WQE completion index atomically. 659 */ 660 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 661 rte_int128_t src; 662 663 memset(&src, 0, sizeof(src)); 664 *ts = src; 665 /* if (*from == *ts) *from = *src else *ts = *from; */ 666 rte_atomic128_cmp_exchange(from, ts, &src, 0, 667 __ATOMIC_RELAXED, __ATOMIC_RELAXED); 668 #else 669 rte_atomic64_t *cqe = (rte_atomic64_t *)from; 670 671 /* Power architecture does not support 16B compare-and-swap. */ 672 for (;;) { 673 int64_t tm, op; 674 int64_t *ps; 675 676 rte_compiler_barrier(); 677 tm = rte_atomic64_read(cqe + 0); 678 op = rte_atomic64_read(cqe + 1); 679 rte_compiler_barrier(); 680 if (tm != rte_atomic64_read(cqe + 0)) 681 continue; 682 if (op != rte_atomic64_read(cqe + 1)) 683 continue; 684 ps = (int64_t *)ts; 685 ps[0] = tm; 686 ps[1] = op; 687 return; 688 } 689 #endif 690 } 691 692 /* Stores timestamp in the cache structure to share data with datapath. */ 693 static inline void 694 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 695 uint64_t ts, uint64_t ci) 696 { 697 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 698 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 699 rte_compiler_barrier(); 700 rte_atomic64_set(&sh->txpp.ts.ts, ts); 701 rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); 702 rte_wmb(); 703 } 704 705 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 706 static inline void 707 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 708 { 709 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 710 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 711 union { 712 rte_int128_t u128; 713 struct mlx5_cqe_ts cts; 714 } to; 715 uint64_t ts; 716 uint16_t ci; 717 718 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 719 "Wrong timestamp CQE part size"); 720 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 721 if (to.cts.op_own >> 4) { 722 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 723 rte_atomic32_inc(&sh->txpp.err_clock_queue); 724 sh->txpp.sync_lost = 1; 725 return; 726 } 727 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 728 ts = rte_be_to_cpu_64(to.cts.timestamp); 729 ts = mlx5_txpp_convert_rx_ts(sh, ts); 730 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 731 wq->sq_ci = ci; 732 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 733 } 734 735 /* Waits for the first completion on Clock Queue to init timestamp. */ 736 static inline void 737 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 738 { 739 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 740 uint32_t wait; 741 742 sh->txpp.ts_p = 0; 743 sh->txpp.ts_n = 0; 744 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 745 struct timespec onems; 746 747 mlx5_txpp_update_timestamp(sh); 748 if (wq->sq_ci) 749 return; 750 /* Wait one millisecond and try again. */ 751 onems.tv_sec = 0; 752 onems.tv_nsec = NS_PER_S / MS_PER_S; 753 nanosleep(&onems, 0); 754 } 755 DRV_LOG(ERR, "Unable to initialize timestamp."); 756 sh->txpp.sync_lost = 1; 757 } 758 759 #ifdef HAVE_IBV_DEVX_EVENT 760 /* Gather statistics for timestamp from Clock Queue CQE. */ 761 static inline void 762 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 763 { 764 /* Check whether we have a valid timestamp. */ 765 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 766 return; 767 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 768 sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; 769 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 770 sh->txpp.ts_p = 0; 771 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 772 ++sh->txpp.ts_n; 773 } 774 775 /* Handles Rearm Queue completions in periodic service. */ 776 static __rte_always_inline void 777 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 778 { 779 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 780 uint32_t cq_ci = wq->cq_ci; 781 bool error = false; 782 int ret; 783 784 do { 785 volatile struct mlx5_cqe *cqe; 786 787 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 788 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 789 switch (ret) { 790 case MLX5_CQE_STATUS_ERR: 791 error = true; 792 ++cq_ci; 793 break; 794 case MLX5_CQE_STATUS_SW_OWN: 795 wq->sq_ci += 2; 796 ++cq_ci; 797 break; 798 case MLX5_CQE_STATUS_HW_OWN: 799 break; 800 default: 801 MLX5_ASSERT(false); 802 break; 803 } 804 } while (ret != MLX5_CQE_STATUS_HW_OWN); 805 if (likely(cq_ci != wq->cq_ci)) { 806 /* Check whether we have missed interrupts. */ 807 if (cq_ci - wq->cq_ci != 1) { 808 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 809 rte_atomic32_inc(&sh->txpp.err_miss_int); 810 /* Check sync lost on wqe index. */ 811 if (cq_ci - wq->cq_ci >= 812 (((1UL << MLX5_WQ_INDEX_WIDTH) / 813 MLX5_TXPP_REARM) - 1)) 814 error = 1; 815 } 816 /* Update doorbell record to notify hardware. */ 817 rte_compiler_barrier(); 818 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 819 rte_wmb(); 820 wq->cq_ci = cq_ci; 821 /* Fire new requests to Rearm Queue. */ 822 if (error) { 823 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 824 rte_atomic32_inc(&sh->txpp.err_rearm_queue); 825 sh->txpp.sync_lost = 1; 826 } 827 } 828 } 829 830 /* Handles Clock Queue completions in periodic service. */ 831 static __rte_always_inline void 832 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 833 { 834 mlx5_txpp_update_timestamp(sh); 835 mlx5_txpp_gather_timestamp(sh); 836 } 837 #endif 838 839 /* Invoked periodically on Rearm Queue completions. */ 840 void 841 mlx5_txpp_interrupt_handler(void *cb_arg) 842 { 843 #ifndef HAVE_IBV_DEVX_EVENT 844 RTE_SET_USED(cb_arg); 845 return; 846 #else 847 struct mlx5_dev_ctx_shared *sh = cb_arg; 848 union { 849 struct mlx5dv_devx_async_event_hdr event_resp; 850 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 851 } out; 852 853 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 854 /* Process events in the loop. Only rearm completions are expected. */ 855 while (mlx5_glue->devx_get_event 856 (sh->txpp.echan, 857 &out.event_resp, 858 sizeof(out.buf)) >= 859 (ssize_t)sizeof(out.event_resp.cookie)) { 860 mlx5_txpp_handle_rearm_queue(sh); 861 mlx5_txpp_handle_clock_queue(sh); 862 mlx5_txpp_cq_arm(sh); 863 mlx5_txpp_doorbell_rearm_queue 864 (sh, sh->txpp.rearm_queue.sq_ci - 1); 865 } 866 #endif /* HAVE_IBV_DEVX_ASYNC */ 867 } 868 869 static void 870 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 871 { 872 if (!sh->txpp.intr_handle.fd) 873 return; 874 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 875 mlx5_txpp_interrupt_handler, sh); 876 sh->txpp.intr_handle.fd = 0; 877 } 878 879 /* Attach interrupt handler and fires first request to Rearm Queue. */ 880 static int 881 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 882 { 883 uint16_t event_nums[1] = {0}; 884 int flags; 885 int ret; 886 887 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 888 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 889 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 890 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 891 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 892 /* Attach interrupt handler to process Rearm Queue completions. */ 893 flags = fcntl(sh->txpp.echan->fd, F_GETFL); 894 ret = fcntl(sh->txpp.echan->fd, F_SETFL, flags | O_NONBLOCK); 895 if (ret) { 896 DRV_LOG(ERR, "Failed to change event channel FD."); 897 rte_errno = errno; 898 return -rte_errno; 899 } 900 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 901 sh->txpp.intr_handle.fd = sh->txpp.echan->fd; 902 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 903 if (rte_intr_callback_register(&sh->txpp.intr_handle, 904 mlx5_txpp_interrupt_handler, sh)) { 905 sh->txpp.intr_handle.fd = 0; 906 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 907 return -rte_errno; 908 } 909 /* Subscribe CQ event to the event channel controlled by the driver. */ 910 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 911 sh->txpp.rearm_queue.cq->obj, 912 sizeof(event_nums), 913 event_nums, 0); 914 if (ret) { 915 DRV_LOG(ERR, "Failed to subscribe CQE event."); 916 rte_errno = errno; 917 return -errno; 918 } 919 /* Enable interrupts in the CQ. */ 920 mlx5_txpp_cq_arm(sh); 921 /* Fire the first request on Rearm Queue. */ 922 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 923 mlx5_txpp_init_timestamp(sh); 924 return 0; 925 } 926 927 /* 928 * The routine initializes the packet pacing infrastructure: 929 * - allocates PP context 930 * - Clock CQ/SQ 931 * - Rearm CQ/SQ 932 * - attaches rearm interrupt handler 933 * - starts Clock Queue 934 * 935 * Returns 0 on success, negative otherwise 936 */ 937 static int 938 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 939 { 940 int tx_pp = priv->config.tx_pp; 941 int ret; 942 943 /* Store the requested pacing parameters. */ 944 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 945 sh->txpp.test = !!(tx_pp < 0); 946 sh->txpp.skew = priv->config.tx_skew; 947 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 948 ret = mlx5_txpp_create_eqn(sh); 949 if (ret) 950 goto exit; 951 ret = mlx5_txpp_alloc_pp_index(sh); 952 if (ret) 953 goto exit; 954 ret = mlx5_txpp_create_clock_queue(sh); 955 if (ret) 956 goto exit; 957 ret = mlx5_txpp_create_rearm_queue(sh); 958 if (ret) 959 goto exit; 960 ret = mlx5_txpp_start_service(sh); 961 if (ret) 962 goto exit; 963 exit: 964 if (ret) { 965 mlx5_txpp_stop_service(sh); 966 mlx5_txpp_destroy_rearm_queue(sh); 967 mlx5_txpp_destroy_clock_queue(sh); 968 mlx5_txpp_free_pp_index(sh); 969 mlx5_txpp_destroy_eqn(sh); 970 sh->txpp.tick = 0; 971 sh->txpp.test = 0; 972 sh->txpp.skew = 0; 973 } 974 return ret; 975 } 976 977 /* 978 * The routine destroys the packet pacing infrastructure: 979 * - detaches rearm interrupt handler 980 * - Rearm CQ/SQ 981 * - Clock CQ/SQ 982 * - PP context 983 */ 984 static void 985 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 986 { 987 mlx5_txpp_stop_service(sh); 988 mlx5_txpp_destroy_rearm_queue(sh); 989 mlx5_txpp_destroy_clock_queue(sh); 990 mlx5_txpp_free_pp_index(sh); 991 mlx5_txpp_destroy_eqn(sh); 992 sh->txpp.tick = 0; 993 sh->txpp.test = 0; 994 sh->txpp.skew = 0; 995 } 996 997 /** 998 * Creates and starts packet pacing infrastructure on specified device. 999 * 1000 * @param dev 1001 * Pointer to Ethernet device structure. 1002 * 1003 * @return 1004 * 0 on success, a negative errno value otherwise and rte_errno is set. 1005 */ 1006 int 1007 mlx5_txpp_start(struct rte_eth_dev *dev) 1008 { 1009 struct mlx5_priv *priv = dev->data->dev_private; 1010 struct mlx5_dev_ctx_shared *sh = priv->sh; 1011 int err = 0; 1012 int ret; 1013 1014 if (!priv->config.tx_pp) { 1015 /* Packet pacing is not requested for the device. */ 1016 MLX5_ASSERT(priv->txpp_en == 0); 1017 return 0; 1018 } 1019 if (priv->txpp_en) { 1020 /* Packet pacing is already enabled for the device. */ 1021 MLX5_ASSERT(sh->txpp.refcnt); 1022 return 0; 1023 } 1024 if (priv->config.tx_pp > 0) { 1025 ret = rte_mbuf_dynflag_lookup 1026 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1027 if (ret < 0) 1028 return 0; 1029 } 1030 ret = pthread_mutex_lock(&sh->txpp.mutex); 1031 MLX5_ASSERT(!ret); 1032 RTE_SET_USED(ret); 1033 if (sh->txpp.refcnt) { 1034 priv->txpp_en = 1; 1035 ++sh->txpp.refcnt; 1036 } else { 1037 err = mlx5_txpp_create(sh, priv); 1038 if (!err) { 1039 MLX5_ASSERT(sh->txpp.tick); 1040 priv->txpp_en = 1; 1041 sh->txpp.refcnt = 1; 1042 } else { 1043 rte_errno = -err; 1044 } 1045 } 1046 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1047 MLX5_ASSERT(!ret); 1048 RTE_SET_USED(ret); 1049 return err; 1050 } 1051 1052 /** 1053 * Stops and destroys packet pacing infrastructure on specified device. 1054 * 1055 * @param dev 1056 * Pointer to Ethernet device structure. 1057 * 1058 * @return 1059 * 0 on success, a negative errno value otherwise and rte_errno is set. 1060 */ 1061 void 1062 mlx5_txpp_stop(struct rte_eth_dev *dev) 1063 { 1064 struct mlx5_priv *priv = dev->data->dev_private; 1065 struct mlx5_dev_ctx_shared *sh = priv->sh; 1066 int ret; 1067 1068 if (!priv->txpp_en) { 1069 /* Packet pacing is already disabled for the device. */ 1070 return; 1071 } 1072 priv->txpp_en = 0; 1073 ret = pthread_mutex_lock(&sh->txpp.mutex); 1074 MLX5_ASSERT(!ret); 1075 RTE_SET_USED(ret); 1076 MLX5_ASSERT(sh->txpp.refcnt); 1077 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1078 return; 1079 /* No references any more, do actual destroy. */ 1080 mlx5_txpp_destroy(sh); 1081 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1082 MLX5_ASSERT(!ret); 1083 RTE_SET_USED(ret); 1084 } 1085 1086 /* 1087 * Read the current clock counter of an Ethernet device 1088 * 1089 * This returns the current raw clock value of an Ethernet device. It is 1090 * a raw amount of ticks, with no given time reference. 1091 * The value returned here is from the same clock than the one 1092 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1093 * offload. Therefore it can be used to compute a precise conversion of 1094 * the device clock to the real time. 1095 * 1096 * @param dev 1097 * Pointer to Ethernet device structure. 1098 * @param clock 1099 * Pointer to the uint64_t that holds the raw clock value. 1100 * 1101 * @return 1102 * - 0: Success. 1103 * - -ENOTSUP: The function is not supported in this mode. Requires 1104 * packet pacing module configured and started (tx_pp devarg) 1105 */ 1106 int 1107 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1108 { 1109 struct mlx5_priv *priv = dev->data->dev_private; 1110 struct mlx5_dev_ctx_shared *sh = priv->sh; 1111 int ret; 1112 1113 if (sh->txpp.refcnt) { 1114 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1115 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1116 union { 1117 rte_int128_t u128; 1118 struct mlx5_cqe_ts cts; 1119 } to; 1120 uint64_t ts; 1121 1122 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1123 if (to.cts.op_own >> 4) { 1124 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1125 rte_atomic32_inc(&sh->txpp.err_clock_queue); 1126 sh->txpp.sync_lost = 1; 1127 return -EIO; 1128 } 1129 ts = rte_be_to_cpu_64(to.cts.timestamp); 1130 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1131 *timestamp = ts; 1132 return 0; 1133 } 1134 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1135 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1136 return -ENOTSUP; 1137 ret = mlx5_read_clock(dev, timestamp); 1138 return ret; 1139 } 1140 1141 /** 1142 * DPDK callback to clear device extended statistics. 1143 * 1144 * @param dev 1145 * Pointer to Ethernet device structure. 1146 * 1147 * @return 1148 * 0 on success and stats is reset, negative errno value otherwise and 1149 * rte_errno is set. 1150 */ 1151 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1152 { 1153 struct mlx5_priv *priv = dev->data->dev_private; 1154 struct mlx5_dev_ctx_shared *sh = priv->sh; 1155 1156 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 1157 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 1158 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 1159 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 1160 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 1161 return 0; 1162 } 1163 1164 /** 1165 * Routine to retrieve names of extended device statistics 1166 * for packet send scheduling. It appends the specific stats names 1167 * after the parts filled by preceding modules (eth stats, etc.) 1168 * 1169 * @param dev 1170 * Pointer to Ethernet device structure. 1171 * @param[out] xstats_names 1172 * Buffer to insert names into. 1173 * @param n 1174 * Number of names. 1175 * @param n_used 1176 * Number of names filled by preceding statistics modules. 1177 * 1178 * @return 1179 * Number of xstats names. 1180 */ 1181 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1182 struct rte_eth_xstat_name *xstats_names, 1183 unsigned int n, unsigned int n_used) 1184 { 1185 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1186 unsigned int i; 1187 1188 if (n >= n_used + n_txpp && xstats_names) { 1189 for (i = 0; i < n_txpp; ++i) { 1190 strncpy(xstats_names[i + n_used].name, 1191 mlx5_txpp_stat_names[i], 1192 RTE_ETH_XSTATS_NAME_SIZE); 1193 xstats_names[i + n_used].name 1194 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1195 } 1196 } 1197 return n_used + n_txpp; 1198 } 1199 1200 static inline void 1201 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1202 struct mlx5_txpp_ts *tsa, uint16_t idx) 1203 { 1204 do { 1205 int64_t ts, ci; 1206 1207 ts = rte_atomic64_read(&txpp->tsa[idx].ts); 1208 ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); 1209 rte_compiler_barrier(); 1210 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1211 continue; 1212 if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) 1213 continue; 1214 if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) 1215 continue; 1216 rte_atomic64_set(&tsa->ts, ts); 1217 rte_atomic64_set(&tsa->ci_ts, ci); 1218 return; 1219 } while (true); 1220 } 1221 1222 /* 1223 * Jitter reflects the clock change between 1224 * neighbours Clock Queue completions. 1225 */ 1226 static uint64_t 1227 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1228 { 1229 struct mlx5_txpp_ts tsa0, tsa1; 1230 int64_t dts, dci; 1231 uint16_t ts_p; 1232 1233 if (txpp->ts_n < 2) { 1234 /* No gathered enough reports yet. */ 1235 return 0; 1236 } 1237 do { 1238 int ts_0, ts_1; 1239 1240 ts_p = txpp->ts_p; 1241 rte_compiler_barrier(); 1242 ts_0 = ts_p - 2; 1243 if (ts_0 < 0) 1244 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1245 ts_1 = ts_p - 1; 1246 if (ts_1 < 0) 1247 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1248 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1249 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1250 rte_compiler_barrier(); 1251 } while (ts_p != txpp->ts_p); 1252 /* We have two neighbor reports, calculate the jitter. */ 1253 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1254 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1255 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1256 if (dci < 0) 1257 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1258 dci *= txpp->tick; 1259 return (dts > dci) ? dts - dci : dci - dts; 1260 } 1261 1262 /* 1263 * Wander reflects the long-term clock change 1264 * over the entire length of all Clock Queue completions. 1265 */ 1266 static uint64_t 1267 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1268 { 1269 struct mlx5_txpp_ts tsa0, tsa1; 1270 int64_t dts, dci; 1271 uint16_t ts_p; 1272 1273 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1274 /* No gathered enough reports yet. */ 1275 return 0; 1276 } 1277 do { 1278 int ts_0, ts_1; 1279 1280 ts_p = txpp->ts_p; 1281 rte_compiler_barrier(); 1282 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1283 if (ts_0 < 0) 1284 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1285 ts_1 = ts_p - 1; 1286 if (ts_1 < 0) 1287 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1288 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1289 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1290 rte_compiler_barrier(); 1291 } while (ts_p != txpp->ts_p); 1292 /* We have two neighbor reports, calculate the jitter. */ 1293 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1294 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1295 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1296 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1297 dci *= txpp->tick; 1298 return (dts > dci) ? dts - dci : dci - dts; 1299 } 1300 1301 /** 1302 * Routine to retrieve extended device statistics 1303 * for packet send scheduling. It appends the specific statistics 1304 * after the parts filled by preceding modules (eth stats, etc.) 1305 * 1306 * @param dev 1307 * Pointer to Ethernet device. 1308 * @param[out] stats 1309 * Pointer to rte extended stats table. 1310 * @param n 1311 * The size of the stats table. 1312 * @param n_used 1313 * Number of stats filled by preceding statistics modules. 1314 * 1315 * @return 1316 * Number of extended stats on success and stats is filled, 1317 * negative on error and rte_errno is set. 1318 */ 1319 int 1320 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1321 struct rte_eth_xstat *stats, 1322 unsigned int n, unsigned int n_used) 1323 { 1324 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1325 1326 if (n >= n_used + n_txpp && stats) { 1327 struct mlx5_priv *priv = dev->data->dev_private; 1328 struct mlx5_dev_ctx_shared *sh = priv->sh; 1329 unsigned int i; 1330 1331 for (i = 0; i < n_txpp; ++i) 1332 stats[n_used + i].id = n_used + i; 1333 stats[n_used + 0].value = 1334 rte_atomic32_read(&sh->txpp.err_miss_int); 1335 stats[n_used + 1].value = 1336 rte_atomic32_read(&sh->txpp.err_rearm_queue); 1337 stats[n_used + 2].value = 1338 rte_atomic32_read(&sh->txpp.err_clock_queue); 1339 stats[n_used + 3].value = 1340 rte_atomic32_read(&sh->txpp.err_ts_past); 1341 stats[n_used + 4].value = 1342 rte_atomic32_read(&sh->txpp.err_ts_future); 1343 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1344 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1345 stats[n_used + 7].value = sh->txpp.sync_lost; 1346 } 1347 return n_used + n_txpp; 1348 } 1349