1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_rxtx.h" 18 #include "mlx5_common_os.h" 19 20 static const char * const mlx5_txpp_stat_names[] = { 21 "txpp_err_miss_int", /* Missed service interrupt. */ 22 "txpp_err_rearm_queue", /* Rearm Queue errors. */ 23 "txpp_err_clock_queue", /* Clock Queue errors. */ 24 "txpp_err_ts_past", /* Timestamp in the past. */ 25 "txpp_err_ts_future", /* Timestamp in the distant future. */ 26 "txpp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 27 "txpp_wander", /* Timestamp jitter (half of Clock Queue completions). */ 28 "txpp_sync_lost", /* Scheduling synchronization lost. */ 29 }; 30 31 /* Destroy Event Queue Notification Channel. */ 32 static void 33 mlx5_txpp_destroy_eqn(struct mlx5_dev_ctx_shared *sh) 34 { 35 if (sh->txpp.echan) { 36 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 37 sh->txpp.echan = NULL; 38 } 39 sh->txpp.eqn = 0; 40 } 41 42 /* Create Event Queue Notification Channel. */ 43 static int 44 mlx5_txpp_create_eqn(struct mlx5_dev_ctx_shared *sh) 45 { 46 uint32_t lcore; 47 48 MLX5_ASSERT(!sh->txpp.echan); 49 lcore = (uint32_t)rte_lcore_to_cpu_id(-1); 50 if (mlx5_glue->devx_query_eqn(sh->ctx, lcore, &sh->txpp.eqn)) { 51 rte_errno = errno; 52 DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno); 53 sh->txpp.eqn = 0; 54 return -rte_errno; 55 } 56 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 57 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 58 if (!sh->txpp.echan) { 59 sh->txpp.eqn = 0; 60 rte_errno = errno; 61 DRV_LOG(ERR, "Failed to create event channel %d.", 62 rte_errno); 63 return -rte_errno; 64 } 65 return 0; 66 } 67 68 static void 69 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 70 { 71 if (sh->txpp.pp) { 72 mlx5_glue->dv_free_pp(sh->txpp.pp); 73 sh->txpp.pp = NULL; 74 sh->txpp.pp_id = 0; 75 } 76 } 77 78 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 79 static int 80 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 81 { 82 #ifdef HAVE_MLX5DV_PP_ALLOC 83 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 84 uint64_t rate; 85 86 MLX5_ASSERT(!sh->txpp.pp); 87 memset(&pp, 0, sizeof(pp)); 88 rate = NS_PER_S / sh->txpp.tick; 89 if (rate * sh->txpp.tick != NS_PER_S) 90 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 91 if (sh->txpp.test) { 92 uint32_t len; 93 94 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 95 (size_t)RTE_ETHER_MIN_LEN); 96 MLX5_SET(set_pp_rate_limit_context, &pp, 97 burst_upper_bound, len); 98 MLX5_SET(set_pp_rate_limit_context, &pp, 99 typical_packet_size, len); 100 /* Convert packets per second into kilobits. */ 101 rate = (rate * len) / (1000ul / CHAR_BIT); 102 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 103 } 104 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 105 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 106 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 107 sh->txpp.pp = mlx5_glue->dv_alloc_pp 108 (sh->ctx, sizeof(pp), &pp, 109 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 110 if (sh->txpp.pp == NULL) { 111 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 112 rte_errno = errno; 113 return -errno; 114 } 115 if (!sh->txpp.pp->index) { 116 DRV_LOG(ERR, "Zero packet pacing index allocated."); 117 mlx5_txpp_free_pp_index(sh); 118 rte_errno = ENOTSUP; 119 return -ENOTSUP; 120 } 121 sh->txpp.pp_id = sh->txpp.pp->index; 122 return 0; 123 #else 124 RTE_SET_USED(sh); 125 DRV_LOG(ERR, "Allocating pacing index is not supported."); 126 rte_errno = ENOTSUP; 127 return -ENOTSUP; 128 #endif 129 } 130 131 static void 132 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 133 { 134 if (wq->sq) 135 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 136 if (wq->sq_umem) 137 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 138 if (wq->sq_buf) 139 mlx5_free((void *)(uintptr_t)wq->sq_buf); 140 if (wq->cq) 141 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 142 if (wq->cq_umem) 143 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 144 if (wq->cq_buf) 145 mlx5_free((void *)(uintptr_t)wq->cq_buf); 146 memset(wq, 0, sizeof(*wq)); 147 } 148 149 static void 150 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 151 { 152 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 153 154 mlx5_txpp_destroy_send_queue(wq); 155 } 156 157 static void 158 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 159 { 160 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 161 162 mlx5_txpp_destroy_send_queue(wq); 163 if (sh->txpp.tsa) { 164 mlx5_free(sh->txpp.tsa); 165 sh->txpp.tsa = NULL; 166 } 167 } 168 169 static void 170 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 171 { 172 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 173 union { 174 uint32_t w32[2]; 175 uint64_t w64; 176 } cs; 177 178 wq->sq_ci = ci + 1; 179 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 180 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 181 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 182 /* Update SQ doorbell record with new SQ ci. */ 183 rte_compiler_barrier(); 184 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 185 /* Make sure the doorbell record is updated. */ 186 rte_wmb(); 187 /* Write to doorbel register to start processing. */ 188 __mlx5_uar_write64_relaxed(cs.w64, sh->tx_uar->reg_addr, NULL); 189 rte_wmb(); 190 } 191 192 static void 193 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 194 { 195 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 196 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 197 uint32_t i; 198 199 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 200 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 201 ++cqe; 202 } 203 } 204 205 static void 206 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 207 { 208 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 209 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 210 uint32_t i; 211 212 for (i = 0; i < wq->sq_size; i += 2) { 213 struct mlx5_wqe_cseg *cs; 214 struct mlx5_wqe_qseg *qs; 215 uint32_t index; 216 217 /* Build SEND_EN request with slave WQE index. */ 218 cs = &wqe[i + 0].cseg; 219 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 220 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 221 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 222 MLX5_COMP_MODE_OFFSET); 223 cs->misc = RTE_BE32(0); 224 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 225 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 226 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 227 qs->max_index = rte_cpu_to_be_32(index); 228 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 229 /* Build WAIT request with slave CQE index. */ 230 cs = &wqe[i + 1].cseg; 231 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 232 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 233 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 234 MLX5_COMP_MODE_OFFSET); 235 cs->misc = RTE_BE32(0); 236 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 237 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 238 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 239 qs->max_index = rte_cpu_to_be_32(index); 240 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 241 } 242 } 243 244 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 245 static int 246 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 247 { 248 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 249 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 250 struct mlx5_devx_cq_attr cq_attr = { 0 }; 251 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 252 size_t page_size = sysconf(_SC_PAGESIZE); 253 uint32_t umem_size, umem_dbrec; 254 int ret; 255 256 /* Allocate memory buffer for CQEs and doorbell record. */ 257 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 258 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 259 umem_size += MLX5_DBR_SIZE; 260 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 261 page_size, sh->numa_node); 262 if (!wq->cq_buf) { 263 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 264 return -ENOMEM; 265 } 266 /* Register allocated buffer in user space with DevX. */ 267 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 268 (void *)(uintptr_t)wq->cq_buf, 269 umem_size, 270 IBV_ACCESS_LOCAL_WRITE); 271 if (!wq->cq_umem) { 272 rte_errno = errno; 273 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 274 goto error; 275 } 276 /* Create completion queue object for Rearm Queue. */ 277 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 278 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 279 cq_attr.uar_page_id = sh->tx_uar->page_id; 280 cq_attr.eqn = sh->txpp.eqn; 281 cq_attr.q_umem_valid = 1; 282 cq_attr.q_umem_offset = 0; 283 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 284 cq_attr.db_umem_valid = 1; 285 cq_attr.db_umem_offset = umem_dbrec; 286 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 287 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 288 cq_attr.log_page_size = rte_log2_u32(page_size); 289 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 290 if (!wq->cq) { 291 rte_errno = errno; 292 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 293 goto error; 294 } 295 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 296 wq->cq_ci = 0; 297 wq->arm_sn = 0; 298 /* Mark all CQEs initially as invalid. */ 299 mlx5_txpp_fill_cqe_rearm_queue(sh); 300 /* 301 * Allocate memory buffer for Send Queue WQEs. 302 * There should be no WQE leftovers in the cyclic queue. 303 */ 304 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 305 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 306 umem_size = MLX5_WQE_SIZE * wq->sq_size; 307 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 308 umem_size += MLX5_DBR_SIZE; 309 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 310 page_size, sh->numa_node); 311 if (!wq->sq_buf) { 312 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 313 rte_errno = ENOMEM; 314 goto error; 315 } 316 /* Register allocated buffer in user space with DevX. */ 317 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 318 (void *)(uintptr_t)wq->sq_buf, 319 umem_size, 320 IBV_ACCESS_LOCAL_WRITE); 321 if (!wq->sq_umem) { 322 rte_errno = errno; 323 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 324 goto error; 325 } 326 /* Create send queue object for Rearm Queue. */ 327 sq_attr.state = MLX5_SQC_STATE_RST; 328 sq_attr.tis_lst_sz = 1; 329 sq_attr.tis_num = sh->tis->id; 330 sq_attr.cqn = wq->cq->id; 331 sq_attr.cd_master = 1; 332 sq_attr.wq_attr.uar_page = sh->tx_uar->page_id; 333 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 334 sq_attr.wq_attr.pd = sh->pdn; 335 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 336 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 337 sq_attr.wq_attr.dbr_umem_valid = 1; 338 sq_attr.wq_attr.dbr_addr = umem_dbrec; 339 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 340 sq_attr.wq_attr.wq_umem_valid = 1; 341 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 342 sq_attr.wq_attr.wq_umem_offset = 0; 343 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 344 if (!wq->sq) { 345 rte_errno = errno; 346 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 347 goto error; 348 } 349 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 350 MLX5_SND_DBR * sizeof(uint32_t)); 351 /* Build the WQEs in the Send Queue before goto Ready state. */ 352 mlx5_txpp_fill_wqe_rearm_queue(sh); 353 /* Change queue state to ready. */ 354 msq_attr.sq_state = MLX5_SQC_STATE_RST; 355 msq_attr.state = MLX5_SQC_STATE_RDY; 356 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 357 if (ret) { 358 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 359 goto error; 360 } 361 return 0; 362 error: 363 ret = -rte_errno; 364 mlx5_txpp_destroy_rearm_queue(sh); 365 rte_errno = -ret; 366 return ret; 367 } 368 369 static void 370 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 371 { 372 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 373 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 374 struct mlx5_wqe_cseg *cs = &wqe->cseg; 375 uint32_t wqe_size, opcode, i; 376 uint8_t *dst; 377 378 /* For test purposes fill the WQ with SEND inline packet. */ 379 if (sh->txpp.test) { 380 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 381 MLX5_WQE_CSEG_SIZE + 382 2 * MLX5_WQE_ESEG_SIZE - 383 MLX5_ESEG_MIN_INLINE_SIZE, 384 MLX5_WSEG_SIZE); 385 opcode = MLX5_OPCODE_SEND; 386 } else { 387 wqe_size = MLX5_WSEG_SIZE; 388 opcode = MLX5_OPCODE_NOP; 389 } 390 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 391 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 392 (wqe_size / MLX5_WSEG_SIZE)); 393 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 394 cs->misc = RTE_BE32(0); 395 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 396 if (sh->txpp.test) { 397 struct mlx5_wqe_eseg *es = &wqe->eseg; 398 struct rte_ether_hdr *eth_hdr; 399 struct rte_ipv4_hdr *ip_hdr; 400 struct rte_udp_hdr *udp_hdr; 401 402 /* Build the inline test packet pattern. */ 403 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 404 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 405 (sizeof(struct rte_ether_hdr) + 406 sizeof(struct rte_ipv4_hdr))); 407 es->flags = 0; 408 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 409 es->swp_offs = 0; 410 es->metadata = 0; 411 es->swp_flags = 0; 412 es->mss = 0; 413 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 414 /* Build test packet L2 header (Ethernet). */ 415 dst = (uint8_t *)&es->inline_data; 416 eth_hdr = (struct rte_ether_hdr *)dst; 417 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 418 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 419 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 420 /* Build test packet L3 header (IP v4). */ 421 dst += sizeof(struct rte_ether_hdr); 422 ip_hdr = (struct rte_ipv4_hdr *)dst; 423 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 424 ip_hdr->type_of_service = 0; 425 ip_hdr->fragment_offset = 0; 426 ip_hdr->time_to_live = 64; 427 ip_hdr->next_proto_id = IPPROTO_UDP; 428 ip_hdr->packet_id = 0; 429 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 430 sizeof(struct rte_ether_hdr)); 431 /* use RFC5735 / RFC2544 reserved network test addresses */ 432 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 433 (0 << 8) | 1); 434 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 435 (0 << 8) | 2); 436 if (MLX5_TXPP_TEST_PKT_SIZE < 437 (sizeof(struct rte_ether_hdr) + 438 sizeof(struct rte_ipv4_hdr) + 439 sizeof(struct rte_udp_hdr))) 440 goto wcopy; 441 /* Build test packet L4 header (UDP). */ 442 dst += sizeof(struct rte_ipv4_hdr); 443 udp_hdr = (struct rte_udp_hdr *)dst; 444 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 445 udp_hdr->dst_port = RTE_BE16(9); 446 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 447 sizeof(struct rte_ether_hdr) - 448 sizeof(struct rte_ipv4_hdr)); 449 udp_hdr->dgram_cksum = 0; 450 /* Fill the test packet data. */ 451 dst += sizeof(struct rte_udp_hdr); 452 for (i = sizeof(struct rte_ether_hdr) + 453 sizeof(struct rte_ipv4_hdr) + 454 sizeof(struct rte_udp_hdr); 455 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 456 *dst++ = (uint8_t)(i & 0xFF); 457 } 458 wcopy: 459 /* Duplicate the pattern to the next WQEs. */ 460 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 461 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 462 dst += wqe_size; 463 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 464 } 465 } 466 467 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 468 static int 469 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 470 { 471 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 472 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 473 struct mlx5_devx_cq_attr cq_attr = { 0 }; 474 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 475 size_t page_size = sysconf(_SC_PAGESIZE); 476 uint32_t umem_size, umem_dbrec; 477 int ret; 478 479 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 480 MLX5_TXPP_REARM_SQ_SIZE * 481 sizeof(struct mlx5_txpp_ts), 482 0, sh->numa_node); 483 if (!sh->txpp.tsa) { 484 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 485 return -ENOMEM; 486 } 487 sh->txpp.ts_p = 0; 488 sh->txpp.ts_n = 0; 489 /* Allocate memory buffer for CQEs and doorbell record. */ 490 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 491 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 492 umem_size += MLX5_DBR_SIZE; 493 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 494 page_size, sh->numa_node); 495 if (!wq->cq_buf) { 496 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 497 return -ENOMEM; 498 } 499 /* Register allocated buffer in user space with DevX. */ 500 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 501 (void *)(uintptr_t)wq->cq_buf, 502 umem_size, 503 IBV_ACCESS_LOCAL_WRITE); 504 if (!wq->cq_umem) { 505 rte_errno = errno; 506 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 507 goto error; 508 } 509 /* Create completion queue object for Clock Queue. */ 510 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 511 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 512 cq_attr.use_first_only = 1; 513 cq_attr.overrun_ignore = 1; 514 cq_attr.uar_page_id = sh->tx_uar->page_id; 515 cq_attr.eqn = sh->txpp.eqn; 516 cq_attr.q_umem_valid = 1; 517 cq_attr.q_umem_offset = 0; 518 cq_attr.q_umem_id = wq->cq_umem->umem_id; 519 cq_attr.db_umem_valid = 1; 520 cq_attr.db_umem_offset = umem_dbrec; 521 cq_attr.db_umem_id = wq->cq_umem->umem_id; 522 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 523 cq_attr.log_page_size = rte_log2_u32(page_size); 524 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 525 if (!wq->cq) { 526 rte_errno = errno; 527 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 528 goto error; 529 } 530 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 531 wq->cq_ci = 0; 532 /* Allocate memory buffer for Send Queue WQEs. */ 533 if (sh->txpp.test) { 534 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 535 MLX5_WQE_CSEG_SIZE + 536 2 * MLX5_WQE_ESEG_SIZE - 537 MLX5_ESEG_MIN_INLINE_SIZE, 538 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 539 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 540 } else { 541 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 542 } 543 /* There should not be WQE leftovers in the cyclic queue. */ 544 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 545 umem_size = MLX5_WQE_SIZE * wq->sq_size; 546 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 547 umem_size += MLX5_DBR_SIZE; 548 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 549 page_size, sh->numa_node); 550 if (!wq->sq_buf) { 551 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 552 rte_errno = ENOMEM; 553 goto error; 554 } 555 /* Register allocated buffer in user space with DevX. */ 556 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 557 (void *)(uintptr_t)wq->sq_buf, 558 umem_size, 559 IBV_ACCESS_LOCAL_WRITE); 560 if (!wq->sq_umem) { 561 rte_errno = errno; 562 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 563 goto error; 564 } 565 /* Create send queue object for Clock Queue. */ 566 if (sh->txpp.test) { 567 sq_attr.tis_lst_sz = 1; 568 sq_attr.tis_num = sh->tis->id; 569 sq_attr.non_wire = 0; 570 sq_attr.static_sq_wq = 1; 571 } else { 572 sq_attr.non_wire = 1; 573 sq_attr.static_sq_wq = 1; 574 } 575 sq_attr.state = MLX5_SQC_STATE_RST; 576 sq_attr.cqn = wq->cq->id; 577 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 578 sq_attr.wq_attr.cd_slave = 1; 579 sq_attr.wq_attr.uar_page = sh->tx_uar->page_id; 580 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 581 sq_attr.wq_attr.pd = sh->pdn; 582 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 583 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 584 sq_attr.wq_attr.dbr_umem_valid = 1; 585 sq_attr.wq_attr.dbr_addr = umem_dbrec; 586 sq_attr.wq_attr.dbr_umem_id = wq->sq_umem->umem_id; 587 sq_attr.wq_attr.wq_umem_valid = 1; 588 sq_attr.wq_attr.wq_umem_id = wq->sq_umem->umem_id; 589 /* umem_offset must be zero for static_sq_wq queue. */ 590 sq_attr.wq_attr.wq_umem_offset = 0; 591 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 592 if (!wq->sq) { 593 rte_errno = errno; 594 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 595 goto error; 596 } 597 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 598 MLX5_SND_DBR * sizeof(uint32_t)); 599 /* Build the WQEs in the Send Queue before goto Ready state. */ 600 mlx5_txpp_fill_wqe_clock_queue(sh); 601 /* Change queue state to ready. */ 602 msq_attr.sq_state = MLX5_SQC_STATE_RST; 603 msq_attr.state = MLX5_SQC_STATE_RDY; 604 wq->sq_ci = 0; 605 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 606 if (ret) { 607 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 608 goto error; 609 } 610 return 0; 611 error: 612 ret = -rte_errno; 613 mlx5_txpp_destroy_clock_queue(sh); 614 rte_errno = -ret; 615 return ret; 616 } 617 618 /* Enable notification from the Rearm Queue CQ. */ 619 static inline void 620 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 621 { 622 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 623 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 624 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 625 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 626 uint32_t *addr = RTE_PTR_ADD(sh->tx_uar->base_addr, MLX5_CQ_DOORBELL); 627 628 rte_compiler_barrier(); 629 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 630 rte_wmb(); 631 #ifdef RTE_ARCH_64 632 *(uint64_t *)addr = db_be; 633 #else 634 *(uint32_t *)addr = db_be; 635 rte_io_wmb(); 636 *((uint32_t *)addr + 1) = db_be >> 32; 637 #endif 638 aq->arm_sn++; 639 } 640 641 static inline void 642 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 643 { 644 /* 645 * The only CQE of Clock Queue is being continuously 646 * update by hardware with soecified rate. We have to 647 * read timestump and WQE completion index atomically. 648 */ 649 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 650 rte_int128_t src; 651 652 memset(&src, 0, sizeof(src)); 653 *ts = src; 654 /* if (*from == *ts) *from = *src else *ts = *from; */ 655 rte_atomic128_cmp_exchange(from, ts, &src, 0, 656 __ATOMIC_RELAXED, __ATOMIC_RELAXED); 657 #else 658 rte_atomic64_t *cqe = (rte_atomic64_t *)from; 659 660 /* Power architecture does not support 16B compare-and-swap. */ 661 for (;;) { 662 int64_t tm, op; 663 int64_t *ps; 664 665 rte_compiler_barrier(); 666 tm = rte_atomic64_read(cqe + 0); 667 op = rte_atomic64_read(cqe + 1); 668 rte_compiler_barrier(); 669 if (tm != rte_atomic64_read(cqe + 0)) 670 continue; 671 if (op != rte_atomic64_read(cqe + 1)) 672 continue; 673 ps = (int64_t *)ts; 674 ps[0] = tm; 675 ps[1] = op; 676 return; 677 } 678 #endif 679 } 680 681 /* Stores timestamp in the cache structure to share data with datapath. */ 682 static inline void 683 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 684 uint64_t ts, uint64_t ci) 685 { 686 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 687 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 688 rte_compiler_barrier(); 689 rte_atomic64_set(&sh->txpp.ts.ts, ts); 690 rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); 691 rte_wmb(); 692 } 693 694 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 695 static inline void 696 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 697 { 698 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 699 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 700 union { 701 rte_int128_t u128; 702 struct mlx5_cqe_ts cts; 703 } to; 704 uint64_t ts; 705 uint16_t ci; 706 707 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 708 "Wrong timestamp CQE part size"); 709 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 710 if (to.cts.op_own >> 4) { 711 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 712 rte_atomic32_inc(&sh->txpp.err_clock_queue); 713 sh->txpp.sync_lost = 1; 714 return; 715 } 716 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 717 ts = rte_be_to_cpu_64(to.cts.timestamp); 718 ts = mlx5_txpp_convert_rx_ts(sh, ts); 719 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 720 wq->sq_ci = ci; 721 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 722 } 723 724 /* Waits for the first completion on Clock Queue to init timestamp. */ 725 static inline void 726 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 727 { 728 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 729 uint32_t wait; 730 731 sh->txpp.ts_p = 0; 732 sh->txpp.ts_n = 0; 733 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 734 struct timespec onems; 735 736 mlx5_txpp_update_timestamp(sh); 737 if (wq->sq_ci) 738 return; 739 /* Wait one millisecond and try again. */ 740 onems.tv_sec = 0; 741 onems.tv_nsec = NS_PER_S / MS_PER_S; 742 nanosleep(&onems, 0); 743 } 744 DRV_LOG(ERR, "Unable to initialize timestamp."); 745 sh->txpp.sync_lost = 1; 746 } 747 748 #ifdef HAVE_IBV_DEVX_EVENT 749 /* Gather statistics for timestamp from Clock Queue CQE. */ 750 static inline void 751 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 752 { 753 /* Check whether we have a valid timestamp. */ 754 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 755 return; 756 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 757 sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; 758 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 759 sh->txpp.ts_p = 0; 760 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 761 ++sh->txpp.ts_n; 762 } 763 764 /* Handles Rearm Queue completions in periodic service. */ 765 static __rte_always_inline void 766 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 767 { 768 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 769 uint32_t cq_ci = wq->cq_ci; 770 bool error = false; 771 int ret; 772 773 do { 774 volatile struct mlx5_cqe *cqe; 775 776 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 777 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 778 switch (ret) { 779 case MLX5_CQE_STATUS_ERR: 780 error = true; 781 ++cq_ci; 782 break; 783 case MLX5_CQE_STATUS_SW_OWN: 784 wq->sq_ci += 2; 785 ++cq_ci; 786 break; 787 case MLX5_CQE_STATUS_HW_OWN: 788 break; 789 default: 790 MLX5_ASSERT(false); 791 break; 792 } 793 } while (ret != MLX5_CQE_STATUS_HW_OWN); 794 if (likely(cq_ci != wq->cq_ci)) { 795 /* Check whether we have missed interrupts. */ 796 if (cq_ci - wq->cq_ci != 1) { 797 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 798 rte_atomic32_inc(&sh->txpp.err_miss_int); 799 /* Check sync lost on wqe index. */ 800 if (cq_ci - wq->cq_ci >= 801 (((1UL << MLX5_WQ_INDEX_WIDTH) / 802 MLX5_TXPP_REARM) - 1)) 803 error = 1; 804 } 805 /* Update doorbell record to notify hardware. */ 806 rte_compiler_barrier(); 807 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 808 rte_wmb(); 809 wq->cq_ci = cq_ci; 810 /* Fire new requests to Rearm Queue. */ 811 if (error) { 812 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 813 rte_atomic32_inc(&sh->txpp.err_rearm_queue); 814 sh->txpp.sync_lost = 1; 815 } 816 } 817 } 818 819 /* Handles Clock Queue completions in periodic service. */ 820 static __rte_always_inline void 821 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 822 { 823 mlx5_txpp_update_timestamp(sh); 824 mlx5_txpp_gather_timestamp(sh); 825 } 826 #endif 827 828 /* Invoked periodically on Rearm Queue completions. */ 829 void 830 mlx5_txpp_interrupt_handler(void *cb_arg) 831 { 832 #ifndef HAVE_IBV_DEVX_EVENT 833 RTE_SET_USED(cb_arg); 834 return; 835 #else 836 struct mlx5_dev_ctx_shared *sh = cb_arg; 837 union { 838 struct mlx5dv_devx_async_event_hdr event_resp; 839 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 840 } out; 841 842 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 843 /* Process events in the loop. Only rearm completions are expected. */ 844 while (mlx5_glue->devx_get_event 845 (sh->txpp.echan, 846 &out.event_resp, 847 sizeof(out.buf)) >= 848 (ssize_t)sizeof(out.event_resp.cookie)) { 849 mlx5_txpp_handle_rearm_queue(sh); 850 mlx5_txpp_handle_clock_queue(sh); 851 mlx5_txpp_cq_arm(sh); 852 mlx5_txpp_doorbell_rearm_queue 853 (sh, sh->txpp.rearm_queue.sq_ci - 1); 854 } 855 #endif /* HAVE_IBV_DEVX_ASYNC */ 856 } 857 858 static void 859 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 860 { 861 if (!sh->txpp.intr_handle.fd) 862 return; 863 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 864 mlx5_txpp_interrupt_handler, sh); 865 sh->txpp.intr_handle.fd = 0; 866 } 867 868 /* Attach interrupt handler and fires first request to Rearm Queue. */ 869 static int 870 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 871 { 872 uint16_t event_nums[1] = {0}; 873 int flags; 874 int ret; 875 876 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 877 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 878 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 879 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 880 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 881 /* Attach interrupt handler to process Rearm Queue completions. */ 882 flags = fcntl(sh->txpp.echan->fd, F_GETFL); 883 ret = fcntl(sh->txpp.echan->fd, F_SETFL, flags | O_NONBLOCK); 884 if (ret) { 885 DRV_LOG(ERR, "Failed to change event channel FD."); 886 rte_errno = errno; 887 return -rte_errno; 888 } 889 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 890 sh->txpp.intr_handle.fd = sh->txpp.echan->fd; 891 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 892 if (rte_intr_callback_register(&sh->txpp.intr_handle, 893 mlx5_txpp_interrupt_handler, sh)) { 894 sh->txpp.intr_handle.fd = 0; 895 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 896 return -rte_errno; 897 } 898 /* Subscribe CQ event to the event channel controlled by the driver. */ 899 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 900 sh->txpp.rearm_queue.cq->obj, 901 sizeof(event_nums), 902 event_nums, 0); 903 if (ret) { 904 DRV_LOG(ERR, "Failed to subscribe CQE event."); 905 rte_errno = errno; 906 return -errno; 907 } 908 /* Enable interrupts in the CQ. */ 909 mlx5_txpp_cq_arm(sh); 910 /* Fire the first request on Rearm Queue. */ 911 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 912 mlx5_txpp_init_timestamp(sh); 913 return 0; 914 } 915 916 /* 917 * The routine initializes the packet pacing infrastructure: 918 * - allocates PP context 919 * - Clock CQ/SQ 920 * - Rearm CQ/SQ 921 * - attaches rearm interrupt handler 922 * - starts Clock Queue 923 * 924 * Returns 0 on success, negative otherwise 925 */ 926 static int 927 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 928 { 929 int tx_pp = priv->config.tx_pp; 930 int ret; 931 932 /* Store the requested pacing parameters. */ 933 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 934 sh->txpp.test = !!(tx_pp < 0); 935 sh->txpp.skew = priv->config.tx_skew; 936 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 937 ret = mlx5_txpp_create_eqn(sh); 938 if (ret) 939 goto exit; 940 ret = mlx5_txpp_alloc_pp_index(sh); 941 if (ret) 942 goto exit; 943 ret = mlx5_txpp_create_clock_queue(sh); 944 if (ret) 945 goto exit; 946 ret = mlx5_txpp_create_rearm_queue(sh); 947 if (ret) 948 goto exit; 949 ret = mlx5_txpp_start_service(sh); 950 if (ret) 951 goto exit; 952 exit: 953 if (ret) { 954 mlx5_txpp_stop_service(sh); 955 mlx5_txpp_destroy_rearm_queue(sh); 956 mlx5_txpp_destroy_clock_queue(sh); 957 mlx5_txpp_free_pp_index(sh); 958 mlx5_txpp_destroy_eqn(sh); 959 sh->txpp.tick = 0; 960 sh->txpp.test = 0; 961 sh->txpp.skew = 0; 962 } 963 return ret; 964 } 965 966 /* 967 * The routine destroys the packet pacing infrastructure: 968 * - detaches rearm interrupt handler 969 * - Rearm CQ/SQ 970 * - Clock CQ/SQ 971 * - PP context 972 */ 973 static void 974 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 975 { 976 mlx5_txpp_stop_service(sh); 977 mlx5_txpp_destroy_rearm_queue(sh); 978 mlx5_txpp_destroy_clock_queue(sh); 979 mlx5_txpp_free_pp_index(sh); 980 mlx5_txpp_destroy_eqn(sh); 981 sh->txpp.tick = 0; 982 sh->txpp.test = 0; 983 sh->txpp.skew = 0; 984 } 985 986 /** 987 * Creates and starts packet pacing infrastructure on specified device. 988 * 989 * @param dev 990 * Pointer to Ethernet device structure. 991 * 992 * @return 993 * 0 on success, a negative errno value otherwise and rte_errno is set. 994 */ 995 int 996 mlx5_txpp_start(struct rte_eth_dev *dev) 997 { 998 struct mlx5_priv *priv = dev->data->dev_private; 999 struct mlx5_dev_ctx_shared *sh = priv->sh; 1000 int err = 0; 1001 int ret; 1002 1003 if (!priv->config.tx_pp) { 1004 /* Packet pacing is not requested for the device. */ 1005 MLX5_ASSERT(priv->txpp_en == 0); 1006 return 0; 1007 } 1008 if (priv->txpp_en) { 1009 /* Packet pacing is already enabled for the device. */ 1010 MLX5_ASSERT(sh->txpp.refcnt); 1011 return 0; 1012 } 1013 if (priv->config.tx_pp > 0) { 1014 ret = rte_mbuf_dynflag_lookup 1015 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1016 if (ret < 0) 1017 return 0; 1018 } 1019 ret = pthread_mutex_lock(&sh->txpp.mutex); 1020 MLX5_ASSERT(!ret); 1021 RTE_SET_USED(ret); 1022 if (sh->txpp.refcnt) { 1023 priv->txpp_en = 1; 1024 ++sh->txpp.refcnt; 1025 } else { 1026 err = mlx5_txpp_create(sh, priv); 1027 if (!err) { 1028 MLX5_ASSERT(sh->txpp.tick); 1029 priv->txpp_en = 1; 1030 sh->txpp.refcnt = 1; 1031 } else { 1032 rte_errno = -err; 1033 } 1034 } 1035 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1036 MLX5_ASSERT(!ret); 1037 RTE_SET_USED(ret); 1038 return err; 1039 } 1040 1041 /** 1042 * Stops and destroys packet pacing infrastructure on specified device. 1043 * 1044 * @param dev 1045 * Pointer to Ethernet device structure. 1046 * 1047 * @return 1048 * 0 on success, a negative errno value otherwise and rte_errno is set. 1049 */ 1050 void 1051 mlx5_txpp_stop(struct rte_eth_dev *dev) 1052 { 1053 struct mlx5_priv *priv = dev->data->dev_private; 1054 struct mlx5_dev_ctx_shared *sh = priv->sh; 1055 int ret; 1056 1057 if (!priv->txpp_en) { 1058 /* Packet pacing is already disabled for the device. */ 1059 return; 1060 } 1061 priv->txpp_en = 0; 1062 ret = pthread_mutex_lock(&sh->txpp.mutex); 1063 MLX5_ASSERT(!ret); 1064 RTE_SET_USED(ret); 1065 MLX5_ASSERT(sh->txpp.refcnt); 1066 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1067 return; 1068 /* No references any more, do actual destroy. */ 1069 mlx5_txpp_destroy(sh); 1070 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1071 MLX5_ASSERT(!ret); 1072 RTE_SET_USED(ret); 1073 } 1074 1075 /* 1076 * Read the current clock counter of an Ethernet device 1077 * 1078 * This returns the current raw clock value of an Ethernet device. It is 1079 * a raw amount of ticks, with no given time reference. 1080 * The value returned here is from the same clock than the one 1081 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1082 * offload. Therefore it can be used to compute a precise conversion of 1083 * the device clock to the real time. 1084 * 1085 * @param dev 1086 * Pointer to Ethernet device structure. 1087 * @param clock 1088 * Pointer to the uint64_t that holds the raw clock value. 1089 * 1090 * @return 1091 * - 0: Success. 1092 * - -ENOTSUP: The function is not supported in this mode. Requires 1093 * packet pacing module configured and started (tx_pp devarg) 1094 */ 1095 int 1096 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1097 { 1098 struct mlx5_priv *priv = dev->data->dev_private; 1099 struct mlx5_dev_ctx_shared *sh = priv->sh; 1100 int ret; 1101 1102 if (sh->txpp.refcnt) { 1103 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1104 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1105 union { 1106 rte_int128_t u128; 1107 struct mlx5_cqe_ts cts; 1108 } to; 1109 uint64_t ts; 1110 1111 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1112 if (to.cts.op_own >> 4) { 1113 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1114 rte_atomic32_inc(&sh->txpp.err_clock_queue); 1115 sh->txpp.sync_lost = 1; 1116 return -EIO; 1117 } 1118 ts = rte_be_to_cpu_64(to.cts.timestamp); 1119 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1120 *timestamp = ts; 1121 return 0; 1122 } 1123 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1124 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1125 return -ENOTSUP; 1126 ret = mlx5_read_clock(dev, timestamp); 1127 return ret; 1128 } 1129 1130 /** 1131 * DPDK callback to clear device extended statistics. 1132 * 1133 * @param dev 1134 * Pointer to Ethernet device structure. 1135 * 1136 * @return 1137 * 0 on success and stats is reset, negative errno value otherwise and 1138 * rte_errno is set. 1139 */ 1140 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1141 { 1142 struct mlx5_priv *priv = dev->data->dev_private; 1143 struct mlx5_dev_ctx_shared *sh = priv->sh; 1144 1145 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 1146 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 1147 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 1148 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 1149 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 1150 return 0; 1151 } 1152 1153 /** 1154 * Routine to retrieve names of extended device statistics 1155 * for packet send scheduling. It appends the specific stats names 1156 * after the parts filled by preceding modules (eth stats, etc.) 1157 * 1158 * @param dev 1159 * Pointer to Ethernet device structure. 1160 * @param[out] xstats_names 1161 * Buffer to insert names into. 1162 * @param n 1163 * Number of names. 1164 * @param n_used 1165 * Number of names filled by preceding statistics modules. 1166 * 1167 * @return 1168 * Number of xstats names. 1169 */ 1170 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1171 struct rte_eth_xstat_name *xstats_names, 1172 unsigned int n, unsigned int n_used) 1173 { 1174 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1175 unsigned int i; 1176 1177 if (n >= n_used + n_txpp && xstats_names) { 1178 for (i = 0; i < n_txpp; ++i) { 1179 strncpy(xstats_names[i + n_used].name, 1180 mlx5_txpp_stat_names[i], 1181 RTE_ETH_XSTATS_NAME_SIZE); 1182 xstats_names[i + n_used].name 1183 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1184 } 1185 } 1186 return n_used + n_txpp; 1187 } 1188 1189 static inline void 1190 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1191 struct mlx5_txpp_ts *tsa, uint16_t idx) 1192 { 1193 do { 1194 int64_t ts, ci; 1195 1196 ts = rte_atomic64_read(&txpp->tsa[idx].ts); 1197 ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); 1198 rte_compiler_barrier(); 1199 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1200 continue; 1201 if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) 1202 continue; 1203 if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) 1204 continue; 1205 rte_atomic64_set(&tsa->ts, ts); 1206 rte_atomic64_set(&tsa->ci_ts, ci); 1207 return; 1208 } while (true); 1209 } 1210 1211 /* 1212 * Jitter reflects the clock change between 1213 * neighbours Clock Queue completions. 1214 */ 1215 static uint64_t 1216 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1217 { 1218 struct mlx5_txpp_ts tsa0, tsa1; 1219 int64_t dts, dci; 1220 uint16_t ts_p; 1221 1222 if (txpp->ts_n < 2) { 1223 /* No gathered enough reports yet. */ 1224 return 0; 1225 } 1226 do { 1227 int ts_0, ts_1; 1228 1229 ts_p = txpp->ts_p; 1230 rte_compiler_barrier(); 1231 ts_0 = ts_p - 2; 1232 if (ts_0 < 0) 1233 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1234 ts_1 = ts_p - 1; 1235 if (ts_1 < 0) 1236 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1237 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1238 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1239 rte_compiler_barrier(); 1240 } while (ts_p != txpp->ts_p); 1241 /* We have two neighbor reports, calculate the jitter. */ 1242 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1243 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1244 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1245 if (dci < 0) 1246 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1247 dci *= txpp->tick; 1248 return (dts > dci) ? dts - dci : dci - dts; 1249 } 1250 1251 /* 1252 * Wander reflects the long-term clock change 1253 * over the entire length of all Clock Queue completions. 1254 */ 1255 static uint64_t 1256 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1257 { 1258 struct mlx5_txpp_ts tsa0, tsa1; 1259 int64_t dts, dci; 1260 uint16_t ts_p; 1261 1262 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1263 /* No gathered enough reports yet. */ 1264 return 0; 1265 } 1266 do { 1267 int ts_0, ts_1; 1268 1269 ts_p = txpp->ts_p; 1270 rte_compiler_barrier(); 1271 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1272 if (ts_0 < 0) 1273 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1274 ts_1 = ts_p - 1; 1275 if (ts_1 < 0) 1276 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1277 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1278 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1279 rte_compiler_barrier(); 1280 } while (ts_p != txpp->ts_p); 1281 /* We have two neighbor reports, calculate the jitter. */ 1282 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1283 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1284 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1285 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1286 dci *= txpp->tick; 1287 return (dts > dci) ? dts - dci : dci - dts; 1288 } 1289 1290 /** 1291 * Routine to retrieve extended device statistics 1292 * for packet send scheduling. It appends the specific statistics 1293 * after the parts filled by preceding modules (eth stats, etc.) 1294 * 1295 * @param dev 1296 * Pointer to Ethernet device. 1297 * @param[out] stats 1298 * Pointer to rte extended stats table. 1299 * @param n 1300 * The size of the stats table. 1301 * @param n_used 1302 * Number of stats filled by preceding statistics modules. 1303 * 1304 * @return 1305 * Number of extended stats on success and stats is filled, 1306 * negative on error and rte_errno is set. 1307 */ 1308 int 1309 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1310 struct rte_eth_xstat *stats, 1311 unsigned int n, unsigned int n_used) 1312 { 1313 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1314 1315 if (n >= n_used + n_txpp && stats) { 1316 struct mlx5_priv *priv = dev->data->dev_private; 1317 struct mlx5_dev_ctx_shared *sh = priv->sh; 1318 unsigned int i; 1319 1320 for (i = 0; i < n_txpp; ++i) 1321 stats[n_used + i].id = n_used + i; 1322 stats[n_used + 0].value = 1323 rte_atomic32_read(&sh->txpp.err_miss_int); 1324 stats[n_used + 1].value = 1325 rte_atomic32_read(&sh->txpp.err_rearm_queue); 1326 stats[n_used + 2].value = 1327 rte_atomic32_read(&sh->txpp.err_clock_queue); 1328 stats[n_used + 3].value = 1329 rte_atomic32_read(&sh->txpp.err_ts_past); 1330 stats[n_used + 4].value = 1331 rte_atomic32_read(&sh->txpp.err_ts_future); 1332 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1333 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1334 stats[n_used + 7].value = sh->txpp.sync_lost; 1335 } 1336 return n_used + n_txpp; 1337 } 1338