1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_paging.h> 14 15 #include <mlx5_malloc.h> 16 17 #include "mlx5.h" 18 #include "mlx5_rxtx.h" 19 #include "mlx5_common_os.h" 20 21 static const char * const mlx5_txpp_stat_names[] = { 22 "txpp_err_miss_int", /* Missed service interrupt. */ 23 "txpp_err_rearm_queue", /* Rearm Queue errors. */ 24 "txpp_err_clock_queue", /* Clock Queue errors. */ 25 "txpp_err_ts_past", /* Timestamp in the past. */ 26 "txpp_err_ts_future", /* Timestamp in the distant future. */ 27 "txpp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 28 "txpp_wander", /* Timestamp jitter (half of Clock Queue completions). */ 29 "txpp_sync_lost", /* Scheduling synchronization lost. */ 30 }; 31 32 /* Destroy Event Queue Notification Channel. */ 33 static void 34 mlx5_txpp_destroy_event_channel(struct mlx5_dev_ctx_shared *sh) 35 { 36 if (sh->txpp.echan) { 37 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 38 sh->txpp.echan = NULL; 39 } 40 } 41 42 /* Create Event Queue Notification Channel. */ 43 static int 44 mlx5_txpp_create_event_channel(struct mlx5_dev_ctx_shared *sh) 45 { 46 MLX5_ASSERT(!sh->txpp.echan); 47 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 48 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 49 if (!sh->txpp.echan) { 50 rte_errno = errno; 51 DRV_LOG(ERR, "Failed to create event channel %d.", rte_errno); 52 return -rte_errno; 53 } 54 return 0; 55 } 56 57 static void 58 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 59 { 60 if (sh->txpp.pp) { 61 mlx5_glue->dv_free_pp(sh->txpp.pp); 62 sh->txpp.pp = NULL; 63 sh->txpp.pp_id = 0; 64 } 65 } 66 67 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 68 static int 69 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 70 { 71 #ifdef HAVE_MLX5DV_PP_ALLOC 72 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 73 uint64_t rate; 74 75 MLX5_ASSERT(!sh->txpp.pp); 76 memset(&pp, 0, sizeof(pp)); 77 rate = NS_PER_S / sh->txpp.tick; 78 if (rate * sh->txpp.tick != NS_PER_S) 79 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 80 if (sh->txpp.test) { 81 uint32_t len; 82 83 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 84 (size_t)RTE_ETHER_MIN_LEN); 85 MLX5_SET(set_pp_rate_limit_context, &pp, 86 burst_upper_bound, len); 87 MLX5_SET(set_pp_rate_limit_context, &pp, 88 typical_packet_size, len); 89 /* Convert packets per second into kilobits. */ 90 rate = (rate * len) / (1000ul / CHAR_BIT); 91 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 92 } 93 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 94 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 95 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 96 sh->txpp.pp = mlx5_glue->dv_alloc_pp 97 (sh->ctx, sizeof(pp), &pp, 98 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 99 if (sh->txpp.pp == NULL) { 100 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 101 rte_errno = errno; 102 return -errno; 103 } 104 if (!((struct mlx5dv_pp *)sh->txpp.pp)->index) { 105 DRV_LOG(ERR, "Zero packet pacing index allocated."); 106 mlx5_txpp_free_pp_index(sh); 107 rte_errno = ENOTSUP; 108 return -ENOTSUP; 109 } 110 sh->txpp.pp_id = ((struct mlx5dv_pp *)(sh->txpp.pp))->index; 111 return 0; 112 #else 113 RTE_SET_USED(sh); 114 DRV_LOG(ERR, "Allocating pacing index is not supported."); 115 rte_errno = ENOTSUP; 116 return -ENOTSUP; 117 #endif 118 } 119 120 static void 121 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 122 { 123 if (wq->sq) 124 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 125 if (wq->sq_umem) 126 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 127 if (wq->sq_buf) 128 mlx5_free((void *)(uintptr_t)wq->sq_buf); 129 if (wq->cq) 130 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 131 if (wq->cq_umem) 132 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 133 if (wq->cq_buf) 134 mlx5_free((void *)(uintptr_t)wq->cq_buf); 135 memset(wq, 0, sizeof(*wq)); 136 } 137 138 static void 139 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 140 { 141 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 142 143 mlx5_txpp_destroy_send_queue(wq); 144 } 145 146 static void 147 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 148 { 149 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 150 151 mlx5_txpp_destroy_send_queue(wq); 152 if (sh->txpp.tsa) { 153 mlx5_free(sh->txpp.tsa); 154 sh->txpp.tsa = NULL; 155 } 156 } 157 158 static void 159 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 160 { 161 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 162 union { 163 uint32_t w32[2]; 164 uint64_t w64; 165 } cs; 166 void *reg_addr; 167 168 wq->sq_ci = ci + 1; 169 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 170 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 171 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 172 /* Update SQ doorbell record with new SQ ci. */ 173 rte_compiler_barrier(); 174 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 175 /* Make sure the doorbell record is updated. */ 176 rte_wmb(); 177 /* Write to doorbel register to start processing. */ 178 reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); 179 __mlx5_uar_write64_relaxed(cs.w64, reg_addr, NULL); 180 rte_wmb(); 181 } 182 183 static void 184 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 185 { 186 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 187 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 188 uint32_t i; 189 190 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 191 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 192 ++cqe; 193 } 194 } 195 196 static void 197 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 198 { 199 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 200 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 201 uint32_t i; 202 203 for (i = 0; i < wq->sq_size; i += 2) { 204 struct mlx5_wqe_cseg *cs; 205 struct mlx5_wqe_qseg *qs; 206 uint32_t index; 207 208 /* Build SEND_EN request with slave WQE index. */ 209 cs = &wqe[i + 0].cseg; 210 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 211 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 212 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 213 MLX5_COMP_MODE_OFFSET); 214 cs->misc = RTE_BE32(0); 215 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 216 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 217 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 218 qs->max_index = rte_cpu_to_be_32(index); 219 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 220 /* Build WAIT request with slave CQE index. */ 221 cs = &wqe[i + 1].cseg; 222 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 223 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 224 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 225 MLX5_COMP_MODE_OFFSET); 226 cs->misc = RTE_BE32(0); 227 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 228 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 229 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 230 qs->max_index = rte_cpu_to_be_32(index); 231 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 232 } 233 } 234 235 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 236 static int 237 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 238 { 239 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 240 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 241 struct mlx5_devx_cq_attr cq_attr = { 0 }; 242 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 243 size_t page_size; 244 uint32_t umem_size, umem_dbrec; 245 int ret; 246 247 page_size = rte_mem_page_size(); 248 if (page_size == (size_t)-1) { 249 DRV_LOG(ERR, "Failed to get mem page size"); 250 return -ENOMEM; 251 } 252 /* Allocate memory buffer for CQEs and doorbell record. */ 253 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 254 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 255 umem_size += MLX5_DBR_SIZE; 256 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 257 page_size, sh->numa_node); 258 if (!wq->cq_buf) { 259 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 260 return -ENOMEM; 261 } 262 /* Register allocated buffer in user space with DevX. */ 263 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 264 (void *)(uintptr_t)wq->cq_buf, 265 umem_size, 266 IBV_ACCESS_LOCAL_WRITE); 267 if (!wq->cq_umem) { 268 rte_errno = errno; 269 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 270 goto error; 271 } 272 /* Create completion queue object for Rearm Queue. */ 273 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 274 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 275 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 276 cq_attr.eqn = sh->eqn; 277 cq_attr.q_umem_valid = 1; 278 cq_attr.q_umem_offset = 0; 279 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 280 cq_attr.db_umem_valid = 1; 281 cq_attr.db_umem_offset = umem_dbrec; 282 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 283 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 284 cq_attr.log_page_size = rte_log2_u32(page_size); 285 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 286 if (!wq->cq) { 287 rte_errno = errno; 288 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 289 goto error; 290 } 291 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 292 wq->cq_ci = 0; 293 wq->arm_sn = 0; 294 /* Mark all CQEs initially as invalid. */ 295 mlx5_txpp_fill_cqe_rearm_queue(sh); 296 /* 297 * Allocate memory buffer for Send Queue WQEs. 298 * There should be no WQE leftovers in the cyclic queue. 299 */ 300 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 301 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 302 umem_size = MLX5_WQE_SIZE * wq->sq_size; 303 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 304 umem_size += MLX5_DBR_SIZE; 305 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 306 page_size, sh->numa_node); 307 if (!wq->sq_buf) { 308 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 309 rte_errno = ENOMEM; 310 goto error; 311 } 312 /* Register allocated buffer in user space with DevX. */ 313 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 314 (void *)(uintptr_t)wq->sq_buf, 315 umem_size, 316 IBV_ACCESS_LOCAL_WRITE); 317 if (!wq->sq_umem) { 318 rte_errno = errno; 319 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 320 goto error; 321 } 322 /* Create send queue object for Rearm Queue. */ 323 sq_attr.state = MLX5_SQC_STATE_RST; 324 sq_attr.tis_lst_sz = 1; 325 sq_attr.tis_num = sh->tis->id; 326 sq_attr.cqn = wq->cq->id; 327 sq_attr.cd_master = 1; 328 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 329 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 330 sq_attr.wq_attr.pd = sh->pdn; 331 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 332 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 333 sq_attr.wq_attr.dbr_umem_valid = 1; 334 sq_attr.wq_attr.dbr_addr = umem_dbrec; 335 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 336 sq_attr.wq_attr.wq_umem_valid = 1; 337 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 338 sq_attr.wq_attr.wq_umem_offset = 0; 339 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 340 if (!wq->sq) { 341 rte_errno = errno; 342 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 343 goto error; 344 } 345 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 346 MLX5_SND_DBR * sizeof(uint32_t)); 347 /* Build the WQEs in the Send Queue before goto Ready state. */ 348 mlx5_txpp_fill_wqe_rearm_queue(sh); 349 /* Change queue state to ready. */ 350 msq_attr.sq_state = MLX5_SQC_STATE_RST; 351 msq_attr.state = MLX5_SQC_STATE_RDY; 352 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 353 if (ret) { 354 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 355 goto error; 356 } 357 return 0; 358 error: 359 ret = -rte_errno; 360 mlx5_txpp_destroy_rearm_queue(sh); 361 rte_errno = -ret; 362 return ret; 363 } 364 365 static void 366 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 367 { 368 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 369 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 370 struct mlx5_wqe_cseg *cs = &wqe->cseg; 371 uint32_t wqe_size, opcode, i; 372 uint8_t *dst; 373 374 /* For test purposes fill the WQ with SEND inline packet. */ 375 if (sh->txpp.test) { 376 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 377 MLX5_WQE_CSEG_SIZE + 378 2 * MLX5_WQE_ESEG_SIZE - 379 MLX5_ESEG_MIN_INLINE_SIZE, 380 MLX5_WSEG_SIZE); 381 opcode = MLX5_OPCODE_SEND; 382 } else { 383 wqe_size = MLX5_WSEG_SIZE; 384 opcode = MLX5_OPCODE_NOP; 385 } 386 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 387 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 388 (wqe_size / MLX5_WSEG_SIZE)); 389 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 390 cs->misc = RTE_BE32(0); 391 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 392 if (sh->txpp.test) { 393 struct mlx5_wqe_eseg *es = &wqe->eseg; 394 struct rte_ether_hdr *eth_hdr; 395 struct rte_ipv4_hdr *ip_hdr; 396 struct rte_udp_hdr *udp_hdr; 397 398 /* Build the inline test packet pattern. */ 399 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 400 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 401 (sizeof(struct rte_ether_hdr) + 402 sizeof(struct rte_ipv4_hdr))); 403 es->flags = 0; 404 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 405 es->swp_offs = 0; 406 es->metadata = 0; 407 es->swp_flags = 0; 408 es->mss = 0; 409 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 410 /* Build test packet L2 header (Ethernet). */ 411 dst = (uint8_t *)&es->inline_data; 412 eth_hdr = (struct rte_ether_hdr *)dst; 413 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 414 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 415 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 416 /* Build test packet L3 header (IP v4). */ 417 dst += sizeof(struct rte_ether_hdr); 418 ip_hdr = (struct rte_ipv4_hdr *)dst; 419 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 420 ip_hdr->type_of_service = 0; 421 ip_hdr->fragment_offset = 0; 422 ip_hdr->time_to_live = 64; 423 ip_hdr->next_proto_id = IPPROTO_UDP; 424 ip_hdr->packet_id = 0; 425 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 426 sizeof(struct rte_ether_hdr)); 427 /* use RFC5735 / RFC2544 reserved network test addresses */ 428 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 429 (0 << 8) | 1); 430 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 431 (0 << 8) | 2); 432 if (MLX5_TXPP_TEST_PKT_SIZE < 433 (sizeof(struct rte_ether_hdr) + 434 sizeof(struct rte_ipv4_hdr) + 435 sizeof(struct rte_udp_hdr))) 436 goto wcopy; 437 /* Build test packet L4 header (UDP). */ 438 dst += sizeof(struct rte_ipv4_hdr); 439 udp_hdr = (struct rte_udp_hdr *)dst; 440 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 441 udp_hdr->dst_port = RTE_BE16(9); 442 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 443 sizeof(struct rte_ether_hdr) - 444 sizeof(struct rte_ipv4_hdr)); 445 udp_hdr->dgram_cksum = 0; 446 /* Fill the test packet data. */ 447 dst += sizeof(struct rte_udp_hdr); 448 for (i = sizeof(struct rte_ether_hdr) + 449 sizeof(struct rte_ipv4_hdr) + 450 sizeof(struct rte_udp_hdr); 451 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 452 *dst++ = (uint8_t)(i & 0xFF); 453 } 454 wcopy: 455 /* Duplicate the pattern to the next WQEs. */ 456 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 457 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 458 dst += wqe_size; 459 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 460 } 461 } 462 463 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 464 static int 465 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 466 { 467 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 468 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 469 struct mlx5_devx_cq_attr cq_attr = { 0 }; 470 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 471 size_t page_size; 472 uint32_t umem_size, umem_dbrec; 473 int ret; 474 475 page_size = rte_mem_page_size(); 476 if (page_size == (size_t)-1) { 477 DRV_LOG(ERR, "Failed to get mem page size"); 478 return -ENOMEM; 479 } 480 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 481 MLX5_TXPP_REARM_SQ_SIZE * 482 sizeof(struct mlx5_txpp_ts), 483 0, sh->numa_node); 484 if (!sh->txpp.tsa) { 485 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 486 return -ENOMEM; 487 } 488 sh->txpp.ts_p = 0; 489 sh->txpp.ts_n = 0; 490 /* Allocate memory buffer for CQEs and doorbell record. */ 491 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 492 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 493 umem_size += MLX5_DBR_SIZE; 494 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 495 page_size, sh->numa_node); 496 if (!wq->cq_buf) { 497 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 498 return -ENOMEM; 499 } 500 /* Register allocated buffer in user space with DevX. */ 501 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 502 (void *)(uintptr_t)wq->cq_buf, 503 umem_size, 504 IBV_ACCESS_LOCAL_WRITE); 505 if (!wq->cq_umem) { 506 rte_errno = errno; 507 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 508 goto error; 509 } 510 /* Create completion queue object for Clock Queue. */ 511 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 512 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 513 cq_attr.use_first_only = 1; 514 cq_attr.overrun_ignore = 1; 515 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 516 cq_attr.eqn = sh->eqn; 517 cq_attr.q_umem_valid = 1; 518 cq_attr.q_umem_offset = 0; 519 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 520 cq_attr.db_umem_valid = 1; 521 cq_attr.db_umem_offset = umem_dbrec; 522 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 523 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 524 cq_attr.log_page_size = rte_log2_u32(page_size); 525 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 526 if (!wq->cq) { 527 rte_errno = errno; 528 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 529 goto error; 530 } 531 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 532 wq->cq_ci = 0; 533 /* Allocate memory buffer for Send Queue WQEs. */ 534 if (sh->txpp.test) { 535 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 536 MLX5_WQE_CSEG_SIZE + 537 2 * MLX5_WQE_ESEG_SIZE - 538 MLX5_ESEG_MIN_INLINE_SIZE, 539 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 540 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 541 } else { 542 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 543 } 544 /* There should not be WQE leftovers in the cyclic queue. */ 545 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 546 umem_size = MLX5_WQE_SIZE * wq->sq_size; 547 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 548 umem_size += MLX5_DBR_SIZE; 549 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 550 page_size, sh->numa_node); 551 if (!wq->sq_buf) { 552 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 553 rte_errno = ENOMEM; 554 goto error; 555 } 556 /* Register allocated buffer in user space with DevX. */ 557 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 558 (void *)(uintptr_t)wq->sq_buf, 559 umem_size, 560 IBV_ACCESS_LOCAL_WRITE); 561 if (!wq->sq_umem) { 562 rte_errno = errno; 563 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 564 goto error; 565 } 566 /* Create send queue object for Clock Queue. */ 567 if (sh->txpp.test) { 568 sq_attr.tis_lst_sz = 1; 569 sq_attr.tis_num = sh->tis->id; 570 sq_attr.non_wire = 0; 571 sq_attr.static_sq_wq = 1; 572 } else { 573 sq_attr.non_wire = 1; 574 sq_attr.static_sq_wq = 1; 575 } 576 sq_attr.state = MLX5_SQC_STATE_RST; 577 sq_attr.cqn = wq->cq->id; 578 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 579 sq_attr.wq_attr.cd_slave = 1; 580 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 581 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 582 sq_attr.wq_attr.pd = sh->pdn; 583 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 584 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 585 sq_attr.wq_attr.dbr_umem_valid = 1; 586 sq_attr.wq_attr.dbr_addr = umem_dbrec; 587 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 588 sq_attr.wq_attr.wq_umem_valid = 1; 589 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 590 /* umem_offset must be zero for static_sq_wq queue. */ 591 sq_attr.wq_attr.wq_umem_offset = 0; 592 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 593 if (!wq->sq) { 594 rte_errno = errno; 595 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 596 goto error; 597 } 598 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 599 MLX5_SND_DBR * sizeof(uint32_t)); 600 /* Build the WQEs in the Send Queue before goto Ready state. */ 601 mlx5_txpp_fill_wqe_clock_queue(sh); 602 /* Change queue state to ready. */ 603 msq_attr.sq_state = MLX5_SQC_STATE_RST; 604 msq_attr.state = MLX5_SQC_STATE_RDY; 605 wq->sq_ci = 0; 606 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 607 if (ret) { 608 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 609 goto error; 610 } 611 return 0; 612 error: 613 ret = -rte_errno; 614 mlx5_txpp_destroy_clock_queue(sh); 615 rte_errno = -ret; 616 return ret; 617 } 618 619 /* Enable notification from the Rearm Queue CQ. */ 620 static inline void 621 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 622 { 623 void *base_addr; 624 625 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 626 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 627 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 628 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 629 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 630 uint32_t *addr = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 631 632 rte_compiler_barrier(); 633 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 634 rte_wmb(); 635 #ifdef RTE_ARCH_64 636 *(uint64_t *)addr = db_be; 637 #else 638 *(uint32_t *)addr = db_be; 639 rte_io_wmb(); 640 *((uint32_t *)addr + 1) = db_be >> 32; 641 #endif 642 aq->arm_sn++; 643 } 644 645 static inline void 646 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 647 { 648 /* 649 * The only CQE of Clock Queue is being continuously 650 * update by hardware with soecified rate. We have to 651 * read timestump and WQE completion index atomically. 652 */ 653 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 654 rte_int128_t src; 655 656 memset(&src, 0, sizeof(src)); 657 *ts = src; 658 /* if (*from == *ts) *from = *src else *ts = *from; */ 659 rte_atomic128_cmp_exchange(from, ts, &src, 0, 660 __ATOMIC_RELAXED, __ATOMIC_RELAXED); 661 #else 662 rte_atomic64_t *cqe = (rte_atomic64_t *)from; 663 664 /* Power architecture does not support 16B compare-and-swap. */ 665 for (;;) { 666 int64_t tm, op; 667 int64_t *ps; 668 669 rte_compiler_barrier(); 670 tm = rte_atomic64_read(cqe + 0); 671 op = rte_atomic64_read(cqe + 1); 672 rte_compiler_barrier(); 673 if (tm != rte_atomic64_read(cqe + 0)) 674 continue; 675 if (op != rte_atomic64_read(cqe + 1)) 676 continue; 677 ps = (int64_t *)ts; 678 ps[0] = tm; 679 ps[1] = op; 680 return; 681 } 682 #endif 683 } 684 685 /* Stores timestamp in the cache structure to share data with datapath. */ 686 static inline void 687 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 688 uint64_t ts, uint64_t ci) 689 { 690 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 691 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 692 rte_compiler_barrier(); 693 rte_atomic64_set(&sh->txpp.ts.ts, ts); 694 rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); 695 rte_wmb(); 696 } 697 698 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 699 static inline void 700 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 701 { 702 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 703 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 704 union { 705 rte_int128_t u128; 706 struct mlx5_cqe_ts cts; 707 } to; 708 uint64_t ts; 709 uint16_t ci; 710 711 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 712 "Wrong timestamp CQE part size"); 713 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 714 if (to.cts.op_own >> 4) { 715 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 716 rte_atomic32_inc(&sh->txpp.err_clock_queue); 717 sh->txpp.sync_lost = 1; 718 return; 719 } 720 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 721 ts = rte_be_to_cpu_64(to.cts.timestamp); 722 ts = mlx5_txpp_convert_rx_ts(sh, ts); 723 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 724 wq->sq_ci = ci; 725 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 726 } 727 728 /* Waits for the first completion on Clock Queue to init timestamp. */ 729 static inline void 730 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 731 { 732 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 733 uint32_t wait; 734 735 sh->txpp.ts_p = 0; 736 sh->txpp.ts_n = 0; 737 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 738 struct timespec onems; 739 740 mlx5_txpp_update_timestamp(sh); 741 if (wq->sq_ci) 742 return; 743 /* Wait one millisecond and try again. */ 744 onems.tv_sec = 0; 745 onems.tv_nsec = NS_PER_S / MS_PER_S; 746 nanosleep(&onems, 0); 747 } 748 DRV_LOG(ERR, "Unable to initialize timestamp."); 749 sh->txpp.sync_lost = 1; 750 } 751 752 #ifdef HAVE_IBV_DEVX_EVENT 753 /* Gather statistics for timestamp from Clock Queue CQE. */ 754 static inline void 755 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 756 { 757 /* Check whether we have a valid timestamp. */ 758 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 759 return; 760 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 761 sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; 762 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 763 sh->txpp.ts_p = 0; 764 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 765 ++sh->txpp.ts_n; 766 } 767 768 /* Handles Rearm Queue completions in periodic service. */ 769 static __rte_always_inline void 770 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 771 { 772 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 773 uint32_t cq_ci = wq->cq_ci; 774 bool error = false; 775 int ret; 776 777 do { 778 volatile struct mlx5_cqe *cqe; 779 780 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 781 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 782 switch (ret) { 783 case MLX5_CQE_STATUS_ERR: 784 error = true; 785 ++cq_ci; 786 break; 787 case MLX5_CQE_STATUS_SW_OWN: 788 wq->sq_ci += 2; 789 ++cq_ci; 790 break; 791 case MLX5_CQE_STATUS_HW_OWN: 792 break; 793 default: 794 MLX5_ASSERT(false); 795 break; 796 } 797 } while (ret != MLX5_CQE_STATUS_HW_OWN); 798 if (likely(cq_ci != wq->cq_ci)) { 799 /* Check whether we have missed interrupts. */ 800 if (cq_ci - wq->cq_ci != 1) { 801 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 802 rte_atomic32_inc(&sh->txpp.err_miss_int); 803 /* Check sync lost on wqe index. */ 804 if (cq_ci - wq->cq_ci >= 805 (((1UL << MLX5_WQ_INDEX_WIDTH) / 806 MLX5_TXPP_REARM) - 1)) 807 error = 1; 808 } 809 /* Update doorbell record to notify hardware. */ 810 rte_compiler_barrier(); 811 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 812 rte_wmb(); 813 wq->cq_ci = cq_ci; 814 /* Fire new requests to Rearm Queue. */ 815 if (error) { 816 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 817 rte_atomic32_inc(&sh->txpp.err_rearm_queue); 818 sh->txpp.sync_lost = 1; 819 } 820 } 821 } 822 823 /* Handles Clock Queue completions in periodic service. */ 824 static __rte_always_inline void 825 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 826 { 827 mlx5_txpp_update_timestamp(sh); 828 mlx5_txpp_gather_timestamp(sh); 829 } 830 #endif 831 832 /* Invoked periodically on Rearm Queue completions. */ 833 void 834 mlx5_txpp_interrupt_handler(void *cb_arg) 835 { 836 #ifndef HAVE_IBV_DEVX_EVENT 837 RTE_SET_USED(cb_arg); 838 return; 839 #else 840 struct mlx5_dev_ctx_shared *sh = cb_arg; 841 union { 842 struct mlx5dv_devx_async_event_hdr event_resp; 843 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 844 } out; 845 846 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 847 /* Process events in the loop. Only rearm completions are expected. */ 848 while (mlx5_glue->devx_get_event 849 (sh->txpp.echan, 850 &out.event_resp, 851 sizeof(out.buf)) >= 852 (ssize_t)sizeof(out.event_resp.cookie)) { 853 mlx5_txpp_handle_rearm_queue(sh); 854 mlx5_txpp_handle_clock_queue(sh); 855 mlx5_txpp_cq_arm(sh); 856 mlx5_txpp_doorbell_rearm_queue 857 (sh, sh->txpp.rearm_queue.sq_ci - 1); 858 } 859 #endif /* HAVE_IBV_DEVX_ASYNC */ 860 } 861 862 static void 863 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 864 { 865 if (!sh->txpp.intr_handle.fd) 866 return; 867 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 868 mlx5_txpp_interrupt_handler, sh); 869 sh->txpp.intr_handle.fd = 0; 870 } 871 872 /* Attach interrupt handler and fires first request to Rearm Queue. */ 873 static int 874 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 875 { 876 uint16_t event_nums[1] = {0}; 877 int ret; 878 int fd; 879 880 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 881 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 882 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 883 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 884 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 885 /* Attach interrupt handler to process Rearm Queue completions. */ 886 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 887 ret = mlx5_os_set_nonblock_channel_fd(fd); 888 if (ret) { 889 DRV_LOG(ERR, "Failed to change event channel FD."); 890 rte_errno = errno; 891 return -rte_errno; 892 } 893 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 894 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 895 sh->txpp.intr_handle.fd = fd; 896 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 897 if (rte_intr_callback_register(&sh->txpp.intr_handle, 898 mlx5_txpp_interrupt_handler, sh)) { 899 sh->txpp.intr_handle.fd = 0; 900 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 901 return -rte_errno; 902 } 903 /* Subscribe CQ event to the event channel controlled by the driver. */ 904 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 905 sh->txpp.rearm_queue.cq->obj, 906 sizeof(event_nums), 907 event_nums, 0); 908 if (ret) { 909 DRV_LOG(ERR, "Failed to subscribe CQE event."); 910 rte_errno = errno; 911 return -errno; 912 } 913 /* Enable interrupts in the CQ. */ 914 mlx5_txpp_cq_arm(sh); 915 /* Fire the first request on Rearm Queue. */ 916 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 917 mlx5_txpp_init_timestamp(sh); 918 return 0; 919 } 920 921 /* 922 * The routine initializes the packet pacing infrastructure: 923 * - allocates PP context 924 * - Clock CQ/SQ 925 * - Rearm CQ/SQ 926 * - attaches rearm interrupt handler 927 * - starts Clock Queue 928 * 929 * Returns 0 on success, negative otherwise 930 */ 931 static int 932 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 933 { 934 int tx_pp = priv->config.tx_pp; 935 int ret; 936 937 /* Store the requested pacing parameters. */ 938 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 939 sh->txpp.test = !!(tx_pp < 0); 940 sh->txpp.skew = priv->config.tx_skew; 941 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 942 ret = mlx5_txpp_create_event_channel(sh); 943 if (ret) 944 goto exit; 945 ret = mlx5_txpp_alloc_pp_index(sh); 946 if (ret) 947 goto exit; 948 ret = mlx5_txpp_create_clock_queue(sh); 949 if (ret) 950 goto exit; 951 ret = mlx5_txpp_create_rearm_queue(sh); 952 if (ret) 953 goto exit; 954 ret = mlx5_txpp_start_service(sh); 955 if (ret) 956 goto exit; 957 exit: 958 if (ret) { 959 mlx5_txpp_stop_service(sh); 960 mlx5_txpp_destroy_rearm_queue(sh); 961 mlx5_txpp_destroy_clock_queue(sh); 962 mlx5_txpp_free_pp_index(sh); 963 mlx5_txpp_destroy_event_channel(sh); 964 sh->txpp.tick = 0; 965 sh->txpp.test = 0; 966 sh->txpp.skew = 0; 967 } 968 return ret; 969 } 970 971 /* 972 * The routine destroys the packet pacing infrastructure: 973 * - detaches rearm interrupt handler 974 * - Rearm CQ/SQ 975 * - Clock CQ/SQ 976 * - PP context 977 */ 978 static void 979 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 980 { 981 mlx5_txpp_stop_service(sh); 982 mlx5_txpp_destroy_rearm_queue(sh); 983 mlx5_txpp_destroy_clock_queue(sh); 984 mlx5_txpp_free_pp_index(sh); 985 mlx5_txpp_destroy_event_channel(sh); 986 sh->txpp.tick = 0; 987 sh->txpp.test = 0; 988 sh->txpp.skew = 0; 989 } 990 991 /** 992 * Creates and starts packet pacing infrastructure on specified device. 993 * 994 * @param dev 995 * Pointer to Ethernet device structure. 996 * 997 * @return 998 * 0 on success, a negative errno value otherwise and rte_errno is set. 999 */ 1000 int 1001 mlx5_txpp_start(struct rte_eth_dev *dev) 1002 { 1003 struct mlx5_priv *priv = dev->data->dev_private; 1004 struct mlx5_dev_ctx_shared *sh = priv->sh; 1005 int err = 0; 1006 int ret; 1007 1008 if (!priv->config.tx_pp) { 1009 /* Packet pacing is not requested for the device. */ 1010 MLX5_ASSERT(priv->txpp_en == 0); 1011 return 0; 1012 } 1013 if (priv->txpp_en) { 1014 /* Packet pacing is already enabled for the device. */ 1015 MLX5_ASSERT(sh->txpp.refcnt); 1016 return 0; 1017 } 1018 if (priv->config.tx_pp > 0) { 1019 ret = rte_mbuf_dynflag_lookup 1020 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1021 if (ret < 0) 1022 return 0; 1023 } 1024 ret = pthread_mutex_lock(&sh->txpp.mutex); 1025 MLX5_ASSERT(!ret); 1026 RTE_SET_USED(ret); 1027 if (sh->txpp.refcnt) { 1028 priv->txpp_en = 1; 1029 ++sh->txpp.refcnt; 1030 } else { 1031 err = mlx5_txpp_create(sh, priv); 1032 if (!err) { 1033 MLX5_ASSERT(sh->txpp.tick); 1034 priv->txpp_en = 1; 1035 sh->txpp.refcnt = 1; 1036 } else { 1037 rte_errno = -err; 1038 } 1039 } 1040 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1041 MLX5_ASSERT(!ret); 1042 RTE_SET_USED(ret); 1043 return err; 1044 } 1045 1046 /** 1047 * Stops and destroys packet pacing infrastructure on specified device. 1048 * 1049 * @param dev 1050 * Pointer to Ethernet device structure. 1051 * 1052 * @return 1053 * 0 on success, a negative errno value otherwise and rte_errno is set. 1054 */ 1055 void 1056 mlx5_txpp_stop(struct rte_eth_dev *dev) 1057 { 1058 struct mlx5_priv *priv = dev->data->dev_private; 1059 struct mlx5_dev_ctx_shared *sh = priv->sh; 1060 int ret; 1061 1062 if (!priv->txpp_en) { 1063 /* Packet pacing is already disabled for the device. */ 1064 return; 1065 } 1066 priv->txpp_en = 0; 1067 ret = pthread_mutex_lock(&sh->txpp.mutex); 1068 MLX5_ASSERT(!ret); 1069 RTE_SET_USED(ret); 1070 MLX5_ASSERT(sh->txpp.refcnt); 1071 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1072 return; 1073 /* No references any more, do actual destroy. */ 1074 mlx5_txpp_destroy(sh); 1075 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1076 MLX5_ASSERT(!ret); 1077 RTE_SET_USED(ret); 1078 } 1079 1080 /* 1081 * Read the current clock counter of an Ethernet device 1082 * 1083 * This returns the current raw clock value of an Ethernet device. It is 1084 * a raw amount of ticks, with no given time reference. 1085 * The value returned here is from the same clock than the one 1086 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1087 * offload. Therefore it can be used to compute a precise conversion of 1088 * the device clock to the real time. 1089 * 1090 * @param dev 1091 * Pointer to Ethernet device structure. 1092 * @param clock 1093 * Pointer to the uint64_t that holds the raw clock value. 1094 * 1095 * @return 1096 * - 0: Success. 1097 * - -ENOTSUP: The function is not supported in this mode. Requires 1098 * packet pacing module configured and started (tx_pp devarg) 1099 */ 1100 int 1101 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1102 { 1103 struct mlx5_priv *priv = dev->data->dev_private; 1104 struct mlx5_dev_ctx_shared *sh = priv->sh; 1105 int ret; 1106 1107 if (sh->txpp.refcnt) { 1108 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1109 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1110 union { 1111 rte_int128_t u128; 1112 struct mlx5_cqe_ts cts; 1113 } to; 1114 uint64_t ts; 1115 1116 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1117 if (to.cts.op_own >> 4) { 1118 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1119 rte_atomic32_inc(&sh->txpp.err_clock_queue); 1120 sh->txpp.sync_lost = 1; 1121 return -EIO; 1122 } 1123 ts = rte_be_to_cpu_64(to.cts.timestamp); 1124 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1125 *timestamp = ts; 1126 return 0; 1127 } 1128 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1129 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1130 return -ENOTSUP; 1131 ret = mlx5_read_clock(dev, timestamp); 1132 return ret; 1133 } 1134 1135 /** 1136 * DPDK callback to clear device extended statistics. 1137 * 1138 * @param dev 1139 * Pointer to Ethernet device structure. 1140 * 1141 * @return 1142 * 0 on success and stats is reset, negative errno value otherwise and 1143 * rte_errno is set. 1144 */ 1145 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1146 { 1147 struct mlx5_priv *priv = dev->data->dev_private; 1148 struct mlx5_dev_ctx_shared *sh = priv->sh; 1149 1150 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 1151 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 1152 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 1153 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 1154 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 1155 return 0; 1156 } 1157 1158 /** 1159 * Routine to retrieve names of extended device statistics 1160 * for packet send scheduling. It appends the specific stats names 1161 * after the parts filled by preceding modules (eth stats, etc.) 1162 * 1163 * @param dev 1164 * Pointer to Ethernet device structure. 1165 * @param[out] xstats_names 1166 * Buffer to insert names into. 1167 * @param n 1168 * Number of names. 1169 * @param n_used 1170 * Number of names filled by preceding statistics modules. 1171 * 1172 * @return 1173 * Number of xstats names. 1174 */ 1175 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1176 struct rte_eth_xstat_name *xstats_names, 1177 unsigned int n, unsigned int n_used) 1178 { 1179 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1180 unsigned int i; 1181 1182 if (n >= n_used + n_txpp && xstats_names) { 1183 for (i = 0; i < n_txpp; ++i) { 1184 strncpy(xstats_names[i + n_used].name, 1185 mlx5_txpp_stat_names[i], 1186 RTE_ETH_XSTATS_NAME_SIZE); 1187 xstats_names[i + n_used].name 1188 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1189 } 1190 } 1191 return n_used + n_txpp; 1192 } 1193 1194 static inline void 1195 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1196 struct mlx5_txpp_ts *tsa, uint16_t idx) 1197 { 1198 do { 1199 int64_t ts, ci; 1200 1201 ts = rte_atomic64_read(&txpp->tsa[idx].ts); 1202 ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); 1203 rte_compiler_barrier(); 1204 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1205 continue; 1206 if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) 1207 continue; 1208 if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) 1209 continue; 1210 rte_atomic64_set(&tsa->ts, ts); 1211 rte_atomic64_set(&tsa->ci_ts, ci); 1212 return; 1213 } while (true); 1214 } 1215 1216 /* 1217 * Jitter reflects the clock change between 1218 * neighbours Clock Queue completions. 1219 */ 1220 static uint64_t 1221 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1222 { 1223 struct mlx5_txpp_ts tsa0, tsa1; 1224 int64_t dts, dci; 1225 uint16_t ts_p; 1226 1227 if (txpp->ts_n < 2) { 1228 /* No gathered enough reports yet. */ 1229 return 0; 1230 } 1231 do { 1232 int ts_0, ts_1; 1233 1234 ts_p = txpp->ts_p; 1235 rte_compiler_barrier(); 1236 ts_0 = ts_p - 2; 1237 if (ts_0 < 0) 1238 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1239 ts_1 = ts_p - 1; 1240 if (ts_1 < 0) 1241 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1242 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1243 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1244 rte_compiler_barrier(); 1245 } while (ts_p != txpp->ts_p); 1246 /* We have two neighbor reports, calculate the jitter. */ 1247 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1248 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1249 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1250 if (dci < 0) 1251 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1252 dci *= txpp->tick; 1253 return (dts > dci) ? dts - dci : dci - dts; 1254 } 1255 1256 /* 1257 * Wander reflects the long-term clock change 1258 * over the entire length of all Clock Queue completions. 1259 */ 1260 static uint64_t 1261 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1262 { 1263 struct mlx5_txpp_ts tsa0, tsa1; 1264 int64_t dts, dci; 1265 uint16_t ts_p; 1266 1267 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1268 /* No gathered enough reports yet. */ 1269 return 0; 1270 } 1271 do { 1272 int ts_0, ts_1; 1273 1274 ts_p = txpp->ts_p; 1275 rte_compiler_barrier(); 1276 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1277 if (ts_0 < 0) 1278 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1279 ts_1 = ts_p - 1; 1280 if (ts_1 < 0) 1281 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1282 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1283 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1284 rte_compiler_barrier(); 1285 } while (ts_p != txpp->ts_p); 1286 /* We have two neighbor reports, calculate the jitter. */ 1287 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1288 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1289 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1290 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1291 dci *= txpp->tick; 1292 return (dts > dci) ? dts - dci : dci - dts; 1293 } 1294 1295 /** 1296 * Routine to retrieve extended device statistics 1297 * for packet send scheduling. It appends the specific statistics 1298 * after the parts filled by preceding modules (eth stats, etc.) 1299 * 1300 * @param dev 1301 * Pointer to Ethernet device. 1302 * @param[out] stats 1303 * Pointer to rte extended stats table. 1304 * @param n 1305 * The size of the stats table. 1306 * @param n_used 1307 * Number of stats filled by preceding statistics modules. 1308 * 1309 * @return 1310 * Number of extended stats on success and stats is filled, 1311 * negative on error and rte_errno is set. 1312 */ 1313 int 1314 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1315 struct rte_eth_xstat *stats, 1316 unsigned int n, unsigned int n_used) 1317 { 1318 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1319 1320 if (n >= n_used + n_txpp && stats) { 1321 struct mlx5_priv *priv = dev->data->dev_private; 1322 struct mlx5_dev_ctx_shared *sh = priv->sh; 1323 unsigned int i; 1324 1325 for (i = 0; i < n_txpp; ++i) 1326 stats[n_used + i].id = n_used + i; 1327 stats[n_used + 0].value = 1328 rte_atomic32_read(&sh->txpp.err_miss_int); 1329 stats[n_used + 1].value = 1330 rte_atomic32_read(&sh->txpp.err_rearm_queue); 1331 stats[n_used + 2].value = 1332 rte_atomic32_read(&sh->txpp.err_clock_queue); 1333 stats[n_used + 3].value = 1334 rte_atomic32_read(&sh->txpp.err_ts_past); 1335 stats[n_used + 4].value = 1336 rte_atomic32_read(&sh->txpp.err_ts_future); 1337 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1338 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1339 stats[n_used + 7].value = sh->txpp.sync_lost; 1340 } 1341 return n_used + n_txpp; 1342 } 1343