1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_paging.h> 14 15 #include <mlx5_malloc.h> 16 17 #include "mlx5.h" 18 #include "mlx5_rxtx.h" 19 #include "mlx5_common_os.h" 20 21 static const char * const mlx5_txpp_stat_names[] = { 22 "txpp_err_miss_int", /* Missed service interrupt. */ 23 "txpp_err_rearm_queue", /* Rearm Queue errors. */ 24 "txpp_err_clock_queue", /* Clock Queue errors. */ 25 "txpp_err_ts_past", /* Timestamp in the past. */ 26 "txpp_err_ts_future", /* Timestamp in the distant future. */ 27 "txpp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 28 "txpp_wander", /* Timestamp jitter (half of Clock Queue completions). */ 29 "txpp_sync_lost", /* Scheduling synchronization lost. */ 30 }; 31 32 /* Destroy Event Queue Notification Channel. */ 33 static void 34 mlx5_txpp_destroy_eqn(struct mlx5_dev_ctx_shared *sh) 35 { 36 if (sh->txpp.echan) { 37 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 38 sh->txpp.echan = NULL; 39 } 40 sh->txpp.eqn = 0; 41 } 42 43 /* Create Event Queue Notification Channel. */ 44 static int 45 mlx5_txpp_create_eqn(struct mlx5_dev_ctx_shared *sh) 46 { 47 uint32_t lcore; 48 49 MLX5_ASSERT(!sh->txpp.echan); 50 lcore = (uint32_t)rte_lcore_to_cpu_id(-1); 51 if (mlx5_glue->devx_query_eqn(sh->ctx, lcore, &sh->txpp.eqn)) { 52 rte_errno = errno; 53 DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno); 54 sh->txpp.eqn = 0; 55 return -rte_errno; 56 } 57 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 58 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 59 if (!sh->txpp.echan) { 60 sh->txpp.eqn = 0; 61 rte_errno = errno; 62 DRV_LOG(ERR, "Failed to create event channel %d.", 63 rte_errno); 64 return -rte_errno; 65 } 66 return 0; 67 } 68 69 static void 70 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 71 { 72 if (sh->txpp.pp) { 73 mlx5_glue->dv_free_pp(sh->txpp.pp); 74 sh->txpp.pp = NULL; 75 sh->txpp.pp_id = 0; 76 } 77 } 78 79 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 80 static int 81 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 82 { 83 #ifdef HAVE_MLX5DV_PP_ALLOC 84 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 85 uint64_t rate; 86 87 MLX5_ASSERT(!sh->txpp.pp); 88 memset(&pp, 0, sizeof(pp)); 89 rate = NS_PER_S / sh->txpp.tick; 90 if (rate * sh->txpp.tick != NS_PER_S) 91 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 92 if (sh->txpp.test) { 93 uint32_t len; 94 95 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 96 (size_t)RTE_ETHER_MIN_LEN); 97 MLX5_SET(set_pp_rate_limit_context, &pp, 98 burst_upper_bound, len); 99 MLX5_SET(set_pp_rate_limit_context, &pp, 100 typical_packet_size, len); 101 /* Convert packets per second into kilobits. */ 102 rate = (rate * len) / (1000ul / CHAR_BIT); 103 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 104 } 105 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 106 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 107 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 108 sh->txpp.pp = mlx5_glue->dv_alloc_pp 109 (sh->ctx, sizeof(pp), &pp, 110 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 111 if (sh->txpp.pp == NULL) { 112 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 113 rte_errno = errno; 114 return -errno; 115 } 116 if (!((struct mlx5dv_pp *)sh->txpp.pp)->index) { 117 DRV_LOG(ERR, "Zero packet pacing index allocated."); 118 mlx5_txpp_free_pp_index(sh); 119 rte_errno = ENOTSUP; 120 return -ENOTSUP; 121 } 122 sh->txpp.pp_id = ((struct mlx5dv_pp *)(sh->txpp.pp))->index; 123 return 0; 124 #else 125 RTE_SET_USED(sh); 126 DRV_LOG(ERR, "Allocating pacing index is not supported."); 127 rte_errno = ENOTSUP; 128 return -ENOTSUP; 129 #endif 130 } 131 132 static void 133 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 134 { 135 if (wq->sq) 136 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 137 if (wq->sq_umem) 138 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 139 if (wq->sq_buf) 140 mlx5_free((void *)(uintptr_t)wq->sq_buf); 141 if (wq->cq) 142 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 143 if (wq->cq_umem) 144 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 145 if (wq->cq_buf) 146 mlx5_free((void *)(uintptr_t)wq->cq_buf); 147 memset(wq, 0, sizeof(*wq)); 148 } 149 150 static void 151 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 152 { 153 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 154 155 mlx5_txpp_destroy_send_queue(wq); 156 } 157 158 static void 159 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 160 { 161 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 162 163 mlx5_txpp_destroy_send_queue(wq); 164 if (sh->txpp.tsa) { 165 mlx5_free(sh->txpp.tsa); 166 sh->txpp.tsa = NULL; 167 } 168 } 169 170 static void 171 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 172 { 173 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 174 union { 175 uint32_t w32[2]; 176 uint64_t w64; 177 } cs; 178 void *reg_addr; 179 180 wq->sq_ci = ci + 1; 181 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 182 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 183 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 184 /* Update SQ doorbell record with new SQ ci. */ 185 rte_compiler_barrier(); 186 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 187 /* Make sure the doorbell record is updated. */ 188 rte_wmb(); 189 /* Write to doorbel register to start processing. */ 190 reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); 191 __mlx5_uar_write64_relaxed(cs.w64, reg_addr, NULL); 192 rte_wmb(); 193 } 194 195 static void 196 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 197 { 198 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 199 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 200 uint32_t i; 201 202 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 203 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 204 ++cqe; 205 } 206 } 207 208 static void 209 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 210 { 211 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 212 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 213 uint32_t i; 214 215 for (i = 0; i < wq->sq_size; i += 2) { 216 struct mlx5_wqe_cseg *cs; 217 struct mlx5_wqe_qseg *qs; 218 uint32_t index; 219 220 /* Build SEND_EN request with slave WQE index. */ 221 cs = &wqe[i + 0].cseg; 222 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 223 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 224 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 225 MLX5_COMP_MODE_OFFSET); 226 cs->misc = RTE_BE32(0); 227 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 228 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 229 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 230 qs->max_index = rte_cpu_to_be_32(index); 231 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 232 /* Build WAIT request with slave CQE index. */ 233 cs = &wqe[i + 1].cseg; 234 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 235 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 236 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 237 MLX5_COMP_MODE_OFFSET); 238 cs->misc = RTE_BE32(0); 239 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 240 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 241 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 242 qs->max_index = rte_cpu_to_be_32(index); 243 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 244 } 245 } 246 247 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 248 static int 249 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 250 { 251 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 252 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 253 struct mlx5_devx_cq_attr cq_attr = { 0 }; 254 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 255 size_t page_size; 256 uint32_t umem_size, umem_dbrec; 257 int ret; 258 259 page_size = rte_mem_page_size(); 260 if (page_size == (size_t)-1) { 261 DRV_LOG(ERR, "Failed to get mem page size"); 262 return -ENOMEM; 263 } 264 /* Allocate memory buffer for CQEs and doorbell record. */ 265 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 266 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 267 umem_size += MLX5_DBR_SIZE; 268 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 269 page_size, sh->numa_node); 270 if (!wq->cq_buf) { 271 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 272 return -ENOMEM; 273 } 274 /* Register allocated buffer in user space with DevX. */ 275 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 276 (void *)(uintptr_t)wq->cq_buf, 277 umem_size, 278 IBV_ACCESS_LOCAL_WRITE); 279 if (!wq->cq_umem) { 280 rte_errno = errno; 281 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 282 goto error; 283 } 284 /* Create completion queue object for Rearm Queue. */ 285 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 286 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 287 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 288 cq_attr.eqn = sh->txpp.eqn; 289 cq_attr.q_umem_valid = 1; 290 cq_attr.q_umem_offset = 0; 291 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 292 cq_attr.db_umem_valid = 1; 293 cq_attr.db_umem_offset = umem_dbrec; 294 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 295 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 296 cq_attr.log_page_size = rte_log2_u32(page_size); 297 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 298 if (!wq->cq) { 299 rte_errno = errno; 300 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 301 goto error; 302 } 303 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 304 wq->cq_ci = 0; 305 wq->arm_sn = 0; 306 /* Mark all CQEs initially as invalid. */ 307 mlx5_txpp_fill_cqe_rearm_queue(sh); 308 /* 309 * Allocate memory buffer for Send Queue WQEs. 310 * There should be no WQE leftovers in the cyclic queue. 311 */ 312 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 313 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 314 umem_size = MLX5_WQE_SIZE * wq->sq_size; 315 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 316 umem_size += MLX5_DBR_SIZE; 317 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 318 page_size, sh->numa_node); 319 if (!wq->sq_buf) { 320 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 321 rte_errno = ENOMEM; 322 goto error; 323 } 324 /* Register allocated buffer in user space with DevX. */ 325 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 326 (void *)(uintptr_t)wq->sq_buf, 327 umem_size, 328 IBV_ACCESS_LOCAL_WRITE); 329 if (!wq->sq_umem) { 330 rte_errno = errno; 331 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 332 goto error; 333 } 334 /* Create send queue object for Rearm Queue. */ 335 sq_attr.state = MLX5_SQC_STATE_RST; 336 sq_attr.tis_lst_sz = 1; 337 sq_attr.tis_num = sh->tis->id; 338 sq_attr.cqn = wq->cq->id; 339 sq_attr.cd_master = 1; 340 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 341 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 342 sq_attr.wq_attr.pd = sh->pdn; 343 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 344 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 345 sq_attr.wq_attr.dbr_umem_valid = 1; 346 sq_attr.wq_attr.dbr_addr = umem_dbrec; 347 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 348 sq_attr.wq_attr.wq_umem_valid = 1; 349 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 350 sq_attr.wq_attr.wq_umem_offset = 0; 351 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 352 if (!wq->sq) { 353 rte_errno = errno; 354 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 355 goto error; 356 } 357 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 358 MLX5_SND_DBR * sizeof(uint32_t)); 359 /* Build the WQEs in the Send Queue before goto Ready state. */ 360 mlx5_txpp_fill_wqe_rearm_queue(sh); 361 /* Change queue state to ready. */ 362 msq_attr.sq_state = MLX5_SQC_STATE_RST; 363 msq_attr.state = MLX5_SQC_STATE_RDY; 364 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 365 if (ret) { 366 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 367 goto error; 368 } 369 return 0; 370 error: 371 ret = -rte_errno; 372 mlx5_txpp_destroy_rearm_queue(sh); 373 rte_errno = -ret; 374 return ret; 375 } 376 377 static void 378 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 379 { 380 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 381 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 382 struct mlx5_wqe_cseg *cs = &wqe->cseg; 383 uint32_t wqe_size, opcode, i; 384 uint8_t *dst; 385 386 /* For test purposes fill the WQ with SEND inline packet. */ 387 if (sh->txpp.test) { 388 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 389 MLX5_WQE_CSEG_SIZE + 390 2 * MLX5_WQE_ESEG_SIZE - 391 MLX5_ESEG_MIN_INLINE_SIZE, 392 MLX5_WSEG_SIZE); 393 opcode = MLX5_OPCODE_SEND; 394 } else { 395 wqe_size = MLX5_WSEG_SIZE; 396 opcode = MLX5_OPCODE_NOP; 397 } 398 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 399 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 400 (wqe_size / MLX5_WSEG_SIZE)); 401 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 402 cs->misc = RTE_BE32(0); 403 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 404 if (sh->txpp.test) { 405 struct mlx5_wqe_eseg *es = &wqe->eseg; 406 struct rte_ether_hdr *eth_hdr; 407 struct rte_ipv4_hdr *ip_hdr; 408 struct rte_udp_hdr *udp_hdr; 409 410 /* Build the inline test packet pattern. */ 411 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 412 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 413 (sizeof(struct rte_ether_hdr) + 414 sizeof(struct rte_ipv4_hdr))); 415 es->flags = 0; 416 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 417 es->swp_offs = 0; 418 es->metadata = 0; 419 es->swp_flags = 0; 420 es->mss = 0; 421 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 422 /* Build test packet L2 header (Ethernet). */ 423 dst = (uint8_t *)&es->inline_data; 424 eth_hdr = (struct rte_ether_hdr *)dst; 425 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 426 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 427 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 428 /* Build test packet L3 header (IP v4). */ 429 dst += sizeof(struct rte_ether_hdr); 430 ip_hdr = (struct rte_ipv4_hdr *)dst; 431 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 432 ip_hdr->type_of_service = 0; 433 ip_hdr->fragment_offset = 0; 434 ip_hdr->time_to_live = 64; 435 ip_hdr->next_proto_id = IPPROTO_UDP; 436 ip_hdr->packet_id = 0; 437 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 438 sizeof(struct rte_ether_hdr)); 439 /* use RFC5735 / RFC2544 reserved network test addresses */ 440 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 441 (0 << 8) | 1); 442 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 443 (0 << 8) | 2); 444 if (MLX5_TXPP_TEST_PKT_SIZE < 445 (sizeof(struct rte_ether_hdr) + 446 sizeof(struct rte_ipv4_hdr) + 447 sizeof(struct rte_udp_hdr))) 448 goto wcopy; 449 /* Build test packet L4 header (UDP). */ 450 dst += sizeof(struct rte_ipv4_hdr); 451 udp_hdr = (struct rte_udp_hdr *)dst; 452 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 453 udp_hdr->dst_port = RTE_BE16(9); 454 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 455 sizeof(struct rte_ether_hdr) - 456 sizeof(struct rte_ipv4_hdr)); 457 udp_hdr->dgram_cksum = 0; 458 /* Fill the test packet data. */ 459 dst += sizeof(struct rte_udp_hdr); 460 for (i = sizeof(struct rte_ether_hdr) + 461 sizeof(struct rte_ipv4_hdr) + 462 sizeof(struct rte_udp_hdr); 463 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 464 *dst++ = (uint8_t)(i & 0xFF); 465 } 466 wcopy: 467 /* Duplicate the pattern to the next WQEs. */ 468 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 469 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 470 dst += wqe_size; 471 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 472 } 473 } 474 475 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 476 static int 477 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 478 { 479 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 480 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 481 struct mlx5_devx_cq_attr cq_attr = { 0 }; 482 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 483 size_t page_size; 484 uint32_t umem_size, umem_dbrec; 485 int ret; 486 487 page_size = rte_mem_page_size(); 488 if (page_size == (size_t)-1) { 489 DRV_LOG(ERR, "Failed to get mem page size"); 490 return -ENOMEM; 491 } 492 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 493 MLX5_TXPP_REARM_SQ_SIZE * 494 sizeof(struct mlx5_txpp_ts), 495 0, sh->numa_node); 496 if (!sh->txpp.tsa) { 497 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 498 return -ENOMEM; 499 } 500 sh->txpp.ts_p = 0; 501 sh->txpp.ts_n = 0; 502 /* Allocate memory buffer for CQEs and doorbell record. */ 503 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 504 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 505 umem_size += MLX5_DBR_SIZE; 506 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 507 page_size, sh->numa_node); 508 if (!wq->cq_buf) { 509 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 510 return -ENOMEM; 511 } 512 /* Register allocated buffer in user space with DevX. */ 513 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 514 (void *)(uintptr_t)wq->cq_buf, 515 umem_size, 516 IBV_ACCESS_LOCAL_WRITE); 517 if (!wq->cq_umem) { 518 rte_errno = errno; 519 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 520 goto error; 521 } 522 /* Create completion queue object for Clock Queue. */ 523 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 524 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 525 cq_attr.use_first_only = 1; 526 cq_attr.overrun_ignore = 1; 527 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 528 cq_attr.eqn = sh->txpp.eqn; 529 cq_attr.q_umem_valid = 1; 530 cq_attr.q_umem_offset = 0; 531 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 532 cq_attr.db_umem_valid = 1; 533 cq_attr.db_umem_offset = umem_dbrec; 534 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 535 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 536 cq_attr.log_page_size = rte_log2_u32(page_size); 537 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 538 if (!wq->cq) { 539 rte_errno = errno; 540 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 541 goto error; 542 } 543 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 544 wq->cq_ci = 0; 545 /* Allocate memory buffer for Send Queue WQEs. */ 546 if (sh->txpp.test) { 547 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 548 MLX5_WQE_CSEG_SIZE + 549 2 * MLX5_WQE_ESEG_SIZE - 550 MLX5_ESEG_MIN_INLINE_SIZE, 551 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 552 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 553 } else { 554 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 555 } 556 /* There should not be WQE leftovers in the cyclic queue. */ 557 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 558 umem_size = MLX5_WQE_SIZE * wq->sq_size; 559 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 560 umem_size += MLX5_DBR_SIZE; 561 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 562 page_size, sh->numa_node); 563 if (!wq->sq_buf) { 564 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 565 rte_errno = ENOMEM; 566 goto error; 567 } 568 /* Register allocated buffer in user space with DevX. */ 569 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 570 (void *)(uintptr_t)wq->sq_buf, 571 umem_size, 572 IBV_ACCESS_LOCAL_WRITE); 573 if (!wq->sq_umem) { 574 rte_errno = errno; 575 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 576 goto error; 577 } 578 /* Create send queue object for Clock Queue. */ 579 if (sh->txpp.test) { 580 sq_attr.tis_lst_sz = 1; 581 sq_attr.tis_num = sh->tis->id; 582 sq_attr.non_wire = 0; 583 sq_attr.static_sq_wq = 1; 584 } else { 585 sq_attr.non_wire = 1; 586 sq_attr.static_sq_wq = 1; 587 } 588 sq_attr.state = MLX5_SQC_STATE_RST; 589 sq_attr.cqn = wq->cq->id; 590 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 591 sq_attr.wq_attr.cd_slave = 1; 592 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 593 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 594 sq_attr.wq_attr.pd = sh->pdn; 595 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 596 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 597 sq_attr.wq_attr.dbr_umem_valid = 1; 598 sq_attr.wq_attr.dbr_addr = umem_dbrec; 599 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 600 sq_attr.wq_attr.wq_umem_valid = 1; 601 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 602 /* umem_offset must be zero for static_sq_wq queue. */ 603 sq_attr.wq_attr.wq_umem_offset = 0; 604 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 605 if (!wq->sq) { 606 rte_errno = errno; 607 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 608 goto error; 609 } 610 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 611 MLX5_SND_DBR * sizeof(uint32_t)); 612 /* Build the WQEs in the Send Queue before goto Ready state. */ 613 mlx5_txpp_fill_wqe_clock_queue(sh); 614 /* Change queue state to ready. */ 615 msq_attr.sq_state = MLX5_SQC_STATE_RST; 616 msq_attr.state = MLX5_SQC_STATE_RDY; 617 wq->sq_ci = 0; 618 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 619 if (ret) { 620 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 621 goto error; 622 } 623 return 0; 624 error: 625 ret = -rte_errno; 626 mlx5_txpp_destroy_clock_queue(sh); 627 rte_errno = -ret; 628 return ret; 629 } 630 631 /* Enable notification from the Rearm Queue CQ. */ 632 static inline void 633 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 634 { 635 void *base_addr; 636 637 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 638 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 639 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 640 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 641 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 642 uint32_t *addr = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 643 644 rte_compiler_barrier(); 645 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 646 rte_wmb(); 647 #ifdef RTE_ARCH_64 648 *(uint64_t *)addr = db_be; 649 #else 650 *(uint32_t *)addr = db_be; 651 rte_io_wmb(); 652 *((uint32_t *)addr + 1) = db_be >> 32; 653 #endif 654 aq->arm_sn++; 655 } 656 657 static inline void 658 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 659 { 660 /* 661 * The only CQE of Clock Queue is being continuously 662 * update by hardware with soecified rate. We have to 663 * read timestump and WQE completion index atomically. 664 */ 665 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 666 rte_int128_t src; 667 668 memset(&src, 0, sizeof(src)); 669 *ts = src; 670 /* if (*from == *ts) *from = *src else *ts = *from; */ 671 rte_atomic128_cmp_exchange(from, ts, &src, 0, 672 __ATOMIC_RELAXED, __ATOMIC_RELAXED); 673 #else 674 rte_atomic64_t *cqe = (rte_atomic64_t *)from; 675 676 /* Power architecture does not support 16B compare-and-swap. */ 677 for (;;) { 678 int64_t tm, op; 679 int64_t *ps; 680 681 rte_compiler_barrier(); 682 tm = rte_atomic64_read(cqe + 0); 683 op = rte_atomic64_read(cqe + 1); 684 rte_compiler_barrier(); 685 if (tm != rte_atomic64_read(cqe + 0)) 686 continue; 687 if (op != rte_atomic64_read(cqe + 1)) 688 continue; 689 ps = (int64_t *)ts; 690 ps[0] = tm; 691 ps[1] = op; 692 return; 693 } 694 #endif 695 } 696 697 /* Stores timestamp in the cache structure to share data with datapath. */ 698 static inline void 699 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 700 uint64_t ts, uint64_t ci) 701 { 702 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 703 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 704 rte_compiler_barrier(); 705 rte_atomic64_set(&sh->txpp.ts.ts, ts); 706 rte_atomic64_set(&sh->txpp.ts.ci_ts, ci); 707 rte_wmb(); 708 } 709 710 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 711 static inline void 712 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 713 { 714 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 715 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 716 union { 717 rte_int128_t u128; 718 struct mlx5_cqe_ts cts; 719 } to; 720 uint64_t ts; 721 uint16_t ci; 722 723 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 724 "Wrong timestamp CQE part size"); 725 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 726 if (to.cts.op_own >> 4) { 727 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 728 rte_atomic32_inc(&sh->txpp.err_clock_queue); 729 sh->txpp.sync_lost = 1; 730 return; 731 } 732 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 733 ts = rte_be_to_cpu_64(to.cts.timestamp); 734 ts = mlx5_txpp_convert_rx_ts(sh, ts); 735 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 736 wq->sq_ci = ci; 737 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 738 } 739 740 /* Waits for the first completion on Clock Queue to init timestamp. */ 741 static inline void 742 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 743 { 744 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 745 uint32_t wait; 746 747 sh->txpp.ts_p = 0; 748 sh->txpp.ts_n = 0; 749 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 750 struct timespec onems; 751 752 mlx5_txpp_update_timestamp(sh); 753 if (wq->sq_ci) 754 return; 755 /* Wait one millisecond and try again. */ 756 onems.tv_sec = 0; 757 onems.tv_nsec = NS_PER_S / MS_PER_S; 758 nanosleep(&onems, 0); 759 } 760 DRV_LOG(ERR, "Unable to initialize timestamp."); 761 sh->txpp.sync_lost = 1; 762 } 763 764 #ifdef HAVE_IBV_DEVX_EVENT 765 /* Gather statistics for timestamp from Clock Queue CQE. */ 766 static inline void 767 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 768 { 769 /* Check whether we have a valid timestamp. */ 770 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 771 return; 772 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 773 sh->txpp.tsa[sh->txpp.ts_p] = sh->txpp.ts; 774 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 775 sh->txpp.ts_p = 0; 776 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 777 ++sh->txpp.ts_n; 778 } 779 780 /* Handles Rearm Queue completions in periodic service. */ 781 static __rte_always_inline void 782 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 783 { 784 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 785 uint32_t cq_ci = wq->cq_ci; 786 bool error = false; 787 int ret; 788 789 do { 790 volatile struct mlx5_cqe *cqe; 791 792 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 793 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 794 switch (ret) { 795 case MLX5_CQE_STATUS_ERR: 796 error = true; 797 ++cq_ci; 798 break; 799 case MLX5_CQE_STATUS_SW_OWN: 800 wq->sq_ci += 2; 801 ++cq_ci; 802 break; 803 case MLX5_CQE_STATUS_HW_OWN: 804 break; 805 default: 806 MLX5_ASSERT(false); 807 break; 808 } 809 } while (ret != MLX5_CQE_STATUS_HW_OWN); 810 if (likely(cq_ci != wq->cq_ci)) { 811 /* Check whether we have missed interrupts. */ 812 if (cq_ci - wq->cq_ci != 1) { 813 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 814 rte_atomic32_inc(&sh->txpp.err_miss_int); 815 /* Check sync lost on wqe index. */ 816 if (cq_ci - wq->cq_ci >= 817 (((1UL << MLX5_WQ_INDEX_WIDTH) / 818 MLX5_TXPP_REARM) - 1)) 819 error = 1; 820 } 821 /* Update doorbell record to notify hardware. */ 822 rte_compiler_barrier(); 823 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 824 rte_wmb(); 825 wq->cq_ci = cq_ci; 826 /* Fire new requests to Rearm Queue. */ 827 if (error) { 828 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 829 rte_atomic32_inc(&sh->txpp.err_rearm_queue); 830 sh->txpp.sync_lost = 1; 831 } 832 } 833 } 834 835 /* Handles Clock Queue completions in periodic service. */ 836 static __rte_always_inline void 837 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 838 { 839 mlx5_txpp_update_timestamp(sh); 840 mlx5_txpp_gather_timestamp(sh); 841 } 842 #endif 843 844 /* Invoked periodically on Rearm Queue completions. */ 845 void 846 mlx5_txpp_interrupt_handler(void *cb_arg) 847 { 848 #ifndef HAVE_IBV_DEVX_EVENT 849 RTE_SET_USED(cb_arg); 850 return; 851 #else 852 struct mlx5_dev_ctx_shared *sh = cb_arg; 853 union { 854 struct mlx5dv_devx_async_event_hdr event_resp; 855 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 856 } out; 857 858 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 859 /* Process events in the loop. Only rearm completions are expected. */ 860 while (mlx5_glue->devx_get_event 861 (sh->txpp.echan, 862 &out.event_resp, 863 sizeof(out.buf)) >= 864 (ssize_t)sizeof(out.event_resp.cookie)) { 865 mlx5_txpp_handle_rearm_queue(sh); 866 mlx5_txpp_handle_clock_queue(sh); 867 mlx5_txpp_cq_arm(sh); 868 mlx5_txpp_doorbell_rearm_queue 869 (sh, sh->txpp.rearm_queue.sq_ci - 1); 870 } 871 #endif /* HAVE_IBV_DEVX_ASYNC */ 872 } 873 874 static void 875 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 876 { 877 if (!sh->txpp.intr_handle.fd) 878 return; 879 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 880 mlx5_txpp_interrupt_handler, sh); 881 sh->txpp.intr_handle.fd = 0; 882 } 883 884 /* Attach interrupt handler and fires first request to Rearm Queue. */ 885 static int 886 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 887 { 888 uint16_t event_nums[1] = {0}; 889 int ret; 890 int fd; 891 892 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 893 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 894 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 895 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 896 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 897 /* Attach interrupt handler to process Rearm Queue completions. */ 898 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 899 ret = mlx5_os_set_nonblock_channel_fd(fd); 900 if (ret) { 901 DRV_LOG(ERR, "Failed to change event channel FD."); 902 rte_errno = errno; 903 return -rte_errno; 904 } 905 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 906 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 907 sh->txpp.intr_handle.fd = fd; 908 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 909 if (rte_intr_callback_register(&sh->txpp.intr_handle, 910 mlx5_txpp_interrupt_handler, sh)) { 911 sh->txpp.intr_handle.fd = 0; 912 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 913 return -rte_errno; 914 } 915 /* Subscribe CQ event to the event channel controlled by the driver. */ 916 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 917 sh->txpp.rearm_queue.cq->obj, 918 sizeof(event_nums), 919 event_nums, 0); 920 if (ret) { 921 DRV_LOG(ERR, "Failed to subscribe CQE event."); 922 rte_errno = errno; 923 return -errno; 924 } 925 /* Enable interrupts in the CQ. */ 926 mlx5_txpp_cq_arm(sh); 927 /* Fire the first request on Rearm Queue. */ 928 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 929 mlx5_txpp_init_timestamp(sh); 930 return 0; 931 } 932 933 /* 934 * The routine initializes the packet pacing infrastructure: 935 * - allocates PP context 936 * - Clock CQ/SQ 937 * - Rearm CQ/SQ 938 * - attaches rearm interrupt handler 939 * - starts Clock Queue 940 * 941 * Returns 0 on success, negative otherwise 942 */ 943 static int 944 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 945 { 946 int tx_pp = priv->config.tx_pp; 947 int ret; 948 949 /* Store the requested pacing parameters. */ 950 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 951 sh->txpp.test = !!(tx_pp < 0); 952 sh->txpp.skew = priv->config.tx_skew; 953 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 954 ret = mlx5_txpp_create_eqn(sh); 955 if (ret) 956 goto exit; 957 ret = mlx5_txpp_alloc_pp_index(sh); 958 if (ret) 959 goto exit; 960 ret = mlx5_txpp_create_clock_queue(sh); 961 if (ret) 962 goto exit; 963 ret = mlx5_txpp_create_rearm_queue(sh); 964 if (ret) 965 goto exit; 966 ret = mlx5_txpp_start_service(sh); 967 if (ret) 968 goto exit; 969 exit: 970 if (ret) { 971 mlx5_txpp_stop_service(sh); 972 mlx5_txpp_destroy_rearm_queue(sh); 973 mlx5_txpp_destroy_clock_queue(sh); 974 mlx5_txpp_free_pp_index(sh); 975 mlx5_txpp_destroy_eqn(sh); 976 sh->txpp.tick = 0; 977 sh->txpp.test = 0; 978 sh->txpp.skew = 0; 979 } 980 return ret; 981 } 982 983 /* 984 * The routine destroys the packet pacing infrastructure: 985 * - detaches rearm interrupt handler 986 * - Rearm CQ/SQ 987 * - Clock CQ/SQ 988 * - PP context 989 */ 990 static void 991 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 992 { 993 mlx5_txpp_stop_service(sh); 994 mlx5_txpp_destroy_rearm_queue(sh); 995 mlx5_txpp_destroy_clock_queue(sh); 996 mlx5_txpp_free_pp_index(sh); 997 mlx5_txpp_destroy_eqn(sh); 998 sh->txpp.tick = 0; 999 sh->txpp.test = 0; 1000 sh->txpp.skew = 0; 1001 } 1002 1003 /** 1004 * Creates and starts packet pacing infrastructure on specified device. 1005 * 1006 * @param dev 1007 * Pointer to Ethernet device structure. 1008 * 1009 * @return 1010 * 0 on success, a negative errno value otherwise and rte_errno is set. 1011 */ 1012 int 1013 mlx5_txpp_start(struct rte_eth_dev *dev) 1014 { 1015 struct mlx5_priv *priv = dev->data->dev_private; 1016 struct mlx5_dev_ctx_shared *sh = priv->sh; 1017 int err = 0; 1018 int ret; 1019 1020 if (!priv->config.tx_pp) { 1021 /* Packet pacing is not requested for the device. */ 1022 MLX5_ASSERT(priv->txpp_en == 0); 1023 return 0; 1024 } 1025 if (priv->txpp_en) { 1026 /* Packet pacing is already enabled for the device. */ 1027 MLX5_ASSERT(sh->txpp.refcnt); 1028 return 0; 1029 } 1030 if (priv->config.tx_pp > 0) { 1031 ret = rte_mbuf_dynflag_lookup 1032 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1033 if (ret < 0) 1034 return 0; 1035 } 1036 ret = pthread_mutex_lock(&sh->txpp.mutex); 1037 MLX5_ASSERT(!ret); 1038 RTE_SET_USED(ret); 1039 if (sh->txpp.refcnt) { 1040 priv->txpp_en = 1; 1041 ++sh->txpp.refcnt; 1042 } else { 1043 err = mlx5_txpp_create(sh, priv); 1044 if (!err) { 1045 MLX5_ASSERT(sh->txpp.tick); 1046 priv->txpp_en = 1; 1047 sh->txpp.refcnt = 1; 1048 } else { 1049 rte_errno = -err; 1050 } 1051 } 1052 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1053 MLX5_ASSERT(!ret); 1054 RTE_SET_USED(ret); 1055 return err; 1056 } 1057 1058 /** 1059 * Stops and destroys packet pacing infrastructure on specified device. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device structure. 1063 * 1064 * @return 1065 * 0 on success, a negative errno value otherwise and rte_errno is set. 1066 */ 1067 void 1068 mlx5_txpp_stop(struct rte_eth_dev *dev) 1069 { 1070 struct mlx5_priv *priv = dev->data->dev_private; 1071 struct mlx5_dev_ctx_shared *sh = priv->sh; 1072 int ret; 1073 1074 if (!priv->txpp_en) { 1075 /* Packet pacing is already disabled for the device. */ 1076 return; 1077 } 1078 priv->txpp_en = 0; 1079 ret = pthread_mutex_lock(&sh->txpp.mutex); 1080 MLX5_ASSERT(!ret); 1081 RTE_SET_USED(ret); 1082 MLX5_ASSERT(sh->txpp.refcnt); 1083 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1084 return; 1085 /* No references any more, do actual destroy. */ 1086 mlx5_txpp_destroy(sh); 1087 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1088 MLX5_ASSERT(!ret); 1089 RTE_SET_USED(ret); 1090 } 1091 1092 /* 1093 * Read the current clock counter of an Ethernet device 1094 * 1095 * This returns the current raw clock value of an Ethernet device. It is 1096 * a raw amount of ticks, with no given time reference. 1097 * The value returned here is from the same clock than the one 1098 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1099 * offload. Therefore it can be used to compute a precise conversion of 1100 * the device clock to the real time. 1101 * 1102 * @param dev 1103 * Pointer to Ethernet device structure. 1104 * @param clock 1105 * Pointer to the uint64_t that holds the raw clock value. 1106 * 1107 * @return 1108 * - 0: Success. 1109 * - -ENOTSUP: The function is not supported in this mode. Requires 1110 * packet pacing module configured and started (tx_pp devarg) 1111 */ 1112 int 1113 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1114 { 1115 struct mlx5_priv *priv = dev->data->dev_private; 1116 struct mlx5_dev_ctx_shared *sh = priv->sh; 1117 int ret; 1118 1119 if (sh->txpp.refcnt) { 1120 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1121 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1122 union { 1123 rte_int128_t u128; 1124 struct mlx5_cqe_ts cts; 1125 } to; 1126 uint64_t ts; 1127 1128 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1129 if (to.cts.op_own >> 4) { 1130 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1131 rte_atomic32_inc(&sh->txpp.err_clock_queue); 1132 sh->txpp.sync_lost = 1; 1133 return -EIO; 1134 } 1135 ts = rte_be_to_cpu_64(to.cts.timestamp); 1136 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1137 *timestamp = ts; 1138 return 0; 1139 } 1140 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1141 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1142 return -ENOTSUP; 1143 ret = mlx5_read_clock(dev, timestamp); 1144 return ret; 1145 } 1146 1147 /** 1148 * DPDK callback to clear device extended statistics. 1149 * 1150 * @param dev 1151 * Pointer to Ethernet device structure. 1152 * 1153 * @return 1154 * 0 on success and stats is reset, negative errno value otherwise and 1155 * rte_errno is set. 1156 */ 1157 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1158 { 1159 struct mlx5_priv *priv = dev->data->dev_private; 1160 struct mlx5_dev_ctx_shared *sh = priv->sh; 1161 1162 rte_atomic32_set(&sh->txpp.err_miss_int, 0); 1163 rte_atomic32_set(&sh->txpp.err_rearm_queue, 0); 1164 rte_atomic32_set(&sh->txpp.err_clock_queue, 0); 1165 rte_atomic32_set(&sh->txpp.err_ts_past, 0); 1166 rte_atomic32_set(&sh->txpp.err_ts_future, 0); 1167 return 0; 1168 } 1169 1170 /** 1171 * Routine to retrieve names of extended device statistics 1172 * for packet send scheduling. It appends the specific stats names 1173 * after the parts filled by preceding modules (eth stats, etc.) 1174 * 1175 * @param dev 1176 * Pointer to Ethernet device structure. 1177 * @param[out] xstats_names 1178 * Buffer to insert names into. 1179 * @param n 1180 * Number of names. 1181 * @param n_used 1182 * Number of names filled by preceding statistics modules. 1183 * 1184 * @return 1185 * Number of xstats names. 1186 */ 1187 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1188 struct rte_eth_xstat_name *xstats_names, 1189 unsigned int n, unsigned int n_used) 1190 { 1191 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1192 unsigned int i; 1193 1194 if (n >= n_used + n_txpp && xstats_names) { 1195 for (i = 0; i < n_txpp; ++i) { 1196 strncpy(xstats_names[i + n_used].name, 1197 mlx5_txpp_stat_names[i], 1198 RTE_ETH_XSTATS_NAME_SIZE); 1199 xstats_names[i + n_used].name 1200 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1201 } 1202 } 1203 return n_used + n_txpp; 1204 } 1205 1206 static inline void 1207 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1208 struct mlx5_txpp_ts *tsa, uint16_t idx) 1209 { 1210 do { 1211 int64_t ts, ci; 1212 1213 ts = rte_atomic64_read(&txpp->tsa[idx].ts); 1214 ci = rte_atomic64_read(&txpp->tsa[idx].ci_ts); 1215 rte_compiler_barrier(); 1216 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1217 continue; 1218 if (rte_atomic64_read(&txpp->tsa[idx].ts) != ts) 1219 continue; 1220 if (rte_atomic64_read(&txpp->tsa[idx].ci_ts) != ci) 1221 continue; 1222 rte_atomic64_set(&tsa->ts, ts); 1223 rte_atomic64_set(&tsa->ci_ts, ci); 1224 return; 1225 } while (true); 1226 } 1227 1228 /* 1229 * Jitter reflects the clock change between 1230 * neighbours Clock Queue completions. 1231 */ 1232 static uint64_t 1233 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1234 { 1235 struct mlx5_txpp_ts tsa0, tsa1; 1236 int64_t dts, dci; 1237 uint16_t ts_p; 1238 1239 if (txpp->ts_n < 2) { 1240 /* No gathered enough reports yet. */ 1241 return 0; 1242 } 1243 do { 1244 int ts_0, ts_1; 1245 1246 ts_p = txpp->ts_p; 1247 rte_compiler_barrier(); 1248 ts_0 = ts_p - 2; 1249 if (ts_0 < 0) 1250 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1251 ts_1 = ts_p - 1; 1252 if (ts_1 < 0) 1253 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1254 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1255 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1256 rte_compiler_barrier(); 1257 } while (ts_p != txpp->ts_p); 1258 /* We have two neighbor reports, calculate the jitter. */ 1259 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1260 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1261 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1262 if (dci < 0) 1263 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1264 dci *= txpp->tick; 1265 return (dts > dci) ? dts - dci : dci - dts; 1266 } 1267 1268 /* 1269 * Wander reflects the long-term clock change 1270 * over the entire length of all Clock Queue completions. 1271 */ 1272 static uint64_t 1273 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1274 { 1275 struct mlx5_txpp_ts tsa0, tsa1; 1276 int64_t dts, dci; 1277 uint16_t ts_p; 1278 1279 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1280 /* No gathered enough reports yet. */ 1281 return 0; 1282 } 1283 do { 1284 int ts_0, ts_1; 1285 1286 ts_p = txpp->ts_p; 1287 rte_compiler_barrier(); 1288 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1289 if (ts_0 < 0) 1290 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1291 ts_1 = ts_p - 1; 1292 if (ts_1 < 0) 1293 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1294 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1295 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1296 rte_compiler_barrier(); 1297 } while (ts_p != txpp->ts_p); 1298 /* We have two neighbor reports, calculate the jitter. */ 1299 dts = rte_atomic64_read(&tsa1.ts) - rte_atomic64_read(&tsa0.ts); 1300 dci = (rte_atomic64_read(&tsa1.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1301 (rte_atomic64_read(&tsa0.ci_ts) >> (64 - MLX5_CQ_INDEX_WIDTH)); 1302 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1303 dci *= txpp->tick; 1304 return (dts > dci) ? dts - dci : dci - dts; 1305 } 1306 1307 /** 1308 * Routine to retrieve extended device statistics 1309 * for packet send scheduling. It appends the specific statistics 1310 * after the parts filled by preceding modules (eth stats, etc.) 1311 * 1312 * @param dev 1313 * Pointer to Ethernet device. 1314 * @param[out] stats 1315 * Pointer to rte extended stats table. 1316 * @param n 1317 * The size of the stats table. 1318 * @param n_used 1319 * Number of stats filled by preceding statistics modules. 1320 * 1321 * @return 1322 * Number of extended stats on success and stats is filled, 1323 * negative on error and rte_errno is set. 1324 */ 1325 int 1326 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1327 struct rte_eth_xstat *stats, 1328 unsigned int n, unsigned int n_used) 1329 { 1330 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1331 1332 if (n >= n_used + n_txpp && stats) { 1333 struct mlx5_priv *priv = dev->data->dev_private; 1334 struct mlx5_dev_ctx_shared *sh = priv->sh; 1335 unsigned int i; 1336 1337 for (i = 0; i < n_txpp; ++i) 1338 stats[n_used + i].id = n_used + i; 1339 stats[n_used + 0].value = 1340 rte_atomic32_read(&sh->txpp.err_miss_int); 1341 stats[n_used + 1].value = 1342 rte_atomic32_read(&sh->txpp.err_rearm_queue); 1343 stats[n_used + 2].value = 1344 rte_atomic32_read(&sh->txpp.err_clock_queue); 1345 stats[n_used + 3].value = 1346 rte_atomic32_read(&sh->txpp.err_ts_past); 1347 stats[n_used + 4].value = 1348 rte_atomic32_read(&sh->txpp.err_ts_future); 1349 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1350 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1351 stats[n_used + 7].value = sh->txpp.sync_lost; 1352 } 1353 return n_used + n_txpp; 1354 } 1355