1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_paging.h> 14 15 #include <mlx5_malloc.h> 16 17 #include "mlx5.h" 18 #include "mlx5_rxtx.h" 19 #include "mlx5_common_os.h" 20 21 static const char * const mlx5_txpp_stat_names[] = { 22 "txpp_err_miss_int", /* Missed service interrupt. */ 23 "txpp_err_rearm_queue", /* Rearm Queue errors. */ 24 "txpp_err_clock_queue", /* Clock Queue errors. */ 25 "txpp_err_ts_past", /* Timestamp in the past. */ 26 "txpp_err_ts_future", /* Timestamp in the distant future. */ 27 "txpp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 28 "txpp_wander", /* Timestamp jitter (half of Clock Queue completions). */ 29 "txpp_sync_lost", /* Scheduling synchronization lost. */ 30 }; 31 32 /* Destroy Event Queue Notification Channel. */ 33 static void 34 mlx5_txpp_destroy_event_channel(struct mlx5_dev_ctx_shared *sh) 35 { 36 if (sh->txpp.echan) { 37 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 38 sh->txpp.echan = NULL; 39 } 40 } 41 42 /* Create Event Queue Notification Channel. */ 43 static int 44 mlx5_txpp_create_event_channel(struct mlx5_dev_ctx_shared *sh) 45 { 46 MLX5_ASSERT(!sh->txpp.echan); 47 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 48 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 49 if (!sh->txpp.echan) { 50 rte_errno = errno; 51 DRV_LOG(ERR, "Failed to create event channel %d.", rte_errno); 52 return -rte_errno; 53 } 54 return 0; 55 } 56 57 static void 58 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 59 { 60 if (sh->txpp.pp) { 61 mlx5_glue->dv_free_pp(sh->txpp.pp); 62 sh->txpp.pp = NULL; 63 sh->txpp.pp_id = 0; 64 } 65 } 66 67 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 68 static int 69 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 70 { 71 #ifdef HAVE_MLX5DV_PP_ALLOC 72 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 73 uint64_t rate; 74 75 MLX5_ASSERT(!sh->txpp.pp); 76 memset(&pp, 0, sizeof(pp)); 77 rate = NS_PER_S / sh->txpp.tick; 78 if (rate * sh->txpp.tick != NS_PER_S) 79 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 80 if (sh->txpp.test) { 81 uint32_t len; 82 83 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 84 (size_t)RTE_ETHER_MIN_LEN); 85 MLX5_SET(set_pp_rate_limit_context, &pp, 86 burst_upper_bound, len); 87 MLX5_SET(set_pp_rate_limit_context, &pp, 88 typical_packet_size, len); 89 /* Convert packets per second into kilobits. */ 90 rate = (rate * len) / (1000ul / CHAR_BIT); 91 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 92 } 93 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 94 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 95 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 96 sh->txpp.pp = mlx5_glue->dv_alloc_pp 97 (sh->ctx, sizeof(pp), &pp, 98 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 99 if (sh->txpp.pp == NULL) { 100 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 101 rte_errno = errno; 102 return -errno; 103 } 104 if (!((struct mlx5dv_pp *)sh->txpp.pp)->index) { 105 DRV_LOG(ERR, "Zero packet pacing index allocated."); 106 mlx5_txpp_free_pp_index(sh); 107 rte_errno = ENOTSUP; 108 return -ENOTSUP; 109 } 110 sh->txpp.pp_id = ((struct mlx5dv_pp *)(sh->txpp.pp))->index; 111 return 0; 112 #else 113 RTE_SET_USED(sh); 114 DRV_LOG(ERR, "Allocating pacing index is not supported."); 115 rte_errno = ENOTSUP; 116 return -ENOTSUP; 117 #endif 118 } 119 120 static void 121 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 122 { 123 if (wq->sq) 124 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 125 if (wq->sq_umem) 126 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 127 if (wq->sq_buf) 128 mlx5_free((void *)(uintptr_t)wq->sq_buf); 129 if (wq->cq) 130 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 131 if (wq->cq_umem) 132 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 133 if (wq->cq_buf) 134 mlx5_free((void *)(uintptr_t)wq->cq_buf); 135 memset(wq, 0, sizeof(*wq)); 136 } 137 138 static void 139 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 140 { 141 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 142 143 mlx5_txpp_destroy_send_queue(wq); 144 } 145 146 static void 147 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 148 { 149 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 150 151 mlx5_txpp_destroy_send_queue(wq); 152 if (sh->txpp.tsa) { 153 mlx5_free(sh->txpp.tsa); 154 sh->txpp.tsa = NULL; 155 } 156 } 157 158 static void 159 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 160 { 161 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 162 union { 163 uint32_t w32[2]; 164 uint64_t w64; 165 } cs; 166 void *reg_addr; 167 168 wq->sq_ci = ci + 1; 169 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 170 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 171 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 172 /* Update SQ doorbell record with new SQ ci. */ 173 rte_compiler_barrier(); 174 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 175 /* Make sure the doorbell record is updated. */ 176 rte_wmb(); 177 /* Write to doorbel register to start processing. */ 178 reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); 179 __mlx5_uar_write64_relaxed(cs.w64, reg_addr, NULL); 180 rte_wmb(); 181 } 182 183 static void 184 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 185 { 186 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 187 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 188 uint32_t i; 189 190 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 191 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 192 ++cqe; 193 } 194 } 195 196 static void 197 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 198 { 199 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 200 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 201 uint32_t i; 202 203 for (i = 0; i < wq->sq_size; i += 2) { 204 struct mlx5_wqe_cseg *cs; 205 struct mlx5_wqe_qseg *qs; 206 uint32_t index; 207 208 /* Build SEND_EN request with slave WQE index. */ 209 cs = &wqe[i + 0].cseg; 210 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 211 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 212 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 213 MLX5_COMP_MODE_OFFSET); 214 cs->misc = RTE_BE32(0); 215 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 216 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 217 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 218 qs->max_index = rte_cpu_to_be_32(index); 219 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 220 /* Build WAIT request with slave CQE index. */ 221 cs = &wqe[i + 1].cseg; 222 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 223 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 224 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 225 MLX5_COMP_MODE_OFFSET); 226 cs->misc = RTE_BE32(0); 227 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 228 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 229 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 230 qs->max_index = rte_cpu_to_be_32(index); 231 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 232 } 233 } 234 235 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 236 static int 237 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 238 { 239 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 240 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 241 struct mlx5_devx_cq_attr cq_attr = { 0 }; 242 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 243 size_t page_size; 244 uint32_t umem_size, umem_dbrec; 245 int ret; 246 247 page_size = rte_mem_page_size(); 248 if (page_size == (size_t)-1) { 249 DRV_LOG(ERR, "Failed to get mem page size"); 250 return -ENOMEM; 251 } 252 /* Allocate memory buffer for CQEs and doorbell record. */ 253 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 254 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 255 umem_size += MLX5_DBR_SIZE; 256 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 257 page_size, sh->numa_node); 258 if (!wq->cq_buf) { 259 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 260 return -ENOMEM; 261 } 262 /* Register allocated buffer in user space with DevX. */ 263 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 264 (void *)(uintptr_t)wq->cq_buf, 265 umem_size, 266 IBV_ACCESS_LOCAL_WRITE); 267 if (!wq->cq_umem) { 268 rte_errno = errno; 269 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 270 goto error; 271 } 272 /* Create completion queue object for Rearm Queue. */ 273 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 274 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 275 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 276 cq_attr.eqn = sh->eqn; 277 cq_attr.q_umem_valid = 1; 278 cq_attr.q_umem_offset = 0; 279 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 280 cq_attr.db_umem_valid = 1; 281 cq_attr.db_umem_offset = umem_dbrec; 282 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 283 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 284 cq_attr.log_page_size = rte_log2_u32(page_size); 285 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 286 if (!wq->cq) { 287 rte_errno = errno; 288 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 289 goto error; 290 } 291 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 292 wq->cq_ci = 0; 293 wq->arm_sn = 0; 294 /* Mark all CQEs initially as invalid. */ 295 mlx5_txpp_fill_cqe_rearm_queue(sh); 296 /* 297 * Allocate memory buffer for Send Queue WQEs. 298 * There should be no WQE leftovers in the cyclic queue. 299 */ 300 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 301 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 302 umem_size = MLX5_WQE_SIZE * wq->sq_size; 303 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 304 umem_size += MLX5_DBR_SIZE; 305 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 306 page_size, sh->numa_node); 307 if (!wq->sq_buf) { 308 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 309 rte_errno = ENOMEM; 310 goto error; 311 } 312 /* Register allocated buffer in user space with DevX. */ 313 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 314 (void *)(uintptr_t)wq->sq_buf, 315 umem_size, 316 IBV_ACCESS_LOCAL_WRITE); 317 if (!wq->sq_umem) { 318 rte_errno = errno; 319 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 320 goto error; 321 } 322 /* Create send queue object for Rearm Queue. */ 323 sq_attr.state = MLX5_SQC_STATE_RST; 324 sq_attr.tis_lst_sz = 1; 325 sq_attr.tis_num = sh->tis->id; 326 sq_attr.cqn = wq->cq->id; 327 sq_attr.cd_master = 1; 328 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 329 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 330 sq_attr.wq_attr.pd = sh->pdn; 331 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 332 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 333 sq_attr.wq_attr.dbr_umem_valid = 1; 334 sq_attr.wq_attr.dbr_addr = umem_dbrec; 335 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 336 sq_attr.wq_attr.wq_umem_valid = 1; 337 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 338 sq_attr.wq_attr.wq_umem_offset = 0; 339 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 340 if (!wq->sq) { 341 rte_errno = errno; 342 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 343 goto error; 344 } 345 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 346 MLX5_SND_DBR * sizeof(uint32_t)); 347 /* Build the WQEs in the Send Queue before goto Ready state. */ 348 mlx5_txpp_fill_wqe_rearm_queue(sh); 349 /* Change queue state to ready. */ 350 msq_attr.sq_state = MLX5_SQC_STATE_RST; 351 msq_attr.state = MLX5_SQC_STATE_RDY; 352 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 353 if (ret) { 354 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 355 goto error; 356 } 357 return 0; 358 error: 359 ret = -rte_errno; 360 mlx5_txpp_destroy_rearm_queue(sh); 361 rte_errno = -ret; 362 return ret; 363 } 364 365 static void 366 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 367 { 368 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 369 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 370 struct mlx5_wqe_cseg *cs = &wqe->cseg; 371 uint32_t wqe_size, opcode, i; 372 uint8_t *dst; 373 374 /* For test purposes fill the WQ with SEND inline packet. */ 375 if (sh->txpp.test) { 376 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 377 MLX5_WQE_CSEG_SIZE + 378 2 * MLX5_WQE_ESEG_SIZE - 379 MLX5_ESEG_MIN_INLINE_SIZE, 380 MLX5_WSEG_SIZE); 381 opcode = MLX5_OPCODE_SEND; 382 } else { 383 wqe_size = MLX5_WSEG_SIZE; 384 opcode = MLX5_OPCODE_NOP; 385 } 386 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 387 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 388 (wqe_size / MLX5_WSEG_SIZE)); 389 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 390 cs->misc = RTE_BE32(0); 391 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 392 if (sh->txpp.test) { 393 struct mlx5_wqe_eseg *es = &wqe->eseg; 394 struct rte_ether_hdr *eth_hdr; 395 struct rte_ipv4_hdr *ip_hdr; 396 struct rte_udp_hdr *udp_hdr; 397 398 /* Build the inline test packet pattern. */ 399 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 400 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 401 (sizeof(struct rte_ether_hdr) + 402 sizeof(struct rte_ipv4_hdr))); 403 es->flags = 0; 404 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 405 es->swp_offs = 0; 406 es->metadata = 0; 407 es->swp_flags = 0; 408 es->mss = 0; 409 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 410 /* Build test packet L2 header (Ethernet). */ 411 dst = (uint8_t *)&es->inline_data; 412 eth_hdr = (struct rte_ether_hdr *)dst; 413 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 414 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 415 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 416 /* Build test packet L3 header (IP v4). */ 417 dst += sizeof(struct rte_ether_hdr); 418 ip_hdr = (struct rte_ipv4_hdr *)dst; 419 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 420 ip_hdr->type_of_service = 0; 421 ip_hdr->fragment_offset = 0; 422 ip_hdr->time_to_live = 64; 423 ip_hdr->next_proto_id = IPPROTO_UDP; 424 ip_hdr->packet_id = 0; 425 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 426 sizeof(struct rte_ether_hdr)); 427 /* use RFC5735 / RFC2544 reserved network test addresses */ 428 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 429 (0 << 8) | 1); 430 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 431 (0 << 8) | 2); 432 if (MLX5_TXPP_TEST_PKT_SIZE < 433 (sizeof(struct rte_ether_hdr) + 434 sizeof(struct rte_ipv4_hdr) + 435 sizeof(struct rte_udp_hdr))) 436 goto wcopy; 437 /* Build test packet L4 header (UDP). */ 438 dst += sizeof(struct rte_ipv4_hdr); 439 udp_hdr = (struct rte_udp_hdr *)dst; 440 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 441 udp_hdr->dst_port = RTE_BE16(9); 442 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 443 sizeof(struct rte_ether_hdr) - 444 sizeof(struct rte_ipv4_hdr)); 445 udp_hdr->dgram_cksum = 0; 446 /* Fill the test packet data. */ 447 dst += sizeof(struct rte_udp_hdr); 448 for (i = sizeof(struct rte_ether_hdr) + 449 sizeof(struct rte_ipv4_hdr) + 450 sizeof(struct rte_udp_hdr); 451 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 452 *dst++ = (uint8_t)(i & 0xFF); 453 } 454 wcopy: 455 /* Duplicate the pattern to the next WQEs. */ 456 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 457 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 458 dst += wqe_size; 459 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 460 } 461 } 462 463 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 464 static int 465 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 466 { 467 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 468 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 469 struct mlx5_devx_cq_attr cq_attr = { 0 }; 470 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 471 size_t page_size; 472 uint32_t umem_size, umem_dbrec; 473 int ret; 474 475 page_size = rte_mem_page_size(); 476 if (page_size == (size_t)-1) { 477 DRV_LOG(ERR, "Failed to get mem page size"); 478 return -ENOMEM; 479 } 480 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 481 MLX5_TXPP_REARM_SQ_SIZE * 482 sizeof(struct mlx5_txpp_ts), 483 0, sh->numa_node); 484 if (!sh->txpp.tsa) { 485 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 486 return -ENOMEM; 487 } 488 sh->txpp.ts_p = 0; 489 sh->txpp.ts_n = 0; 490 /* Allocate memory buffer for CQEs and doorbell record. */ 491 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 492 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 493 umem_size += MLX5_DBR_SIZE; 494 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 495 page_size, sh->numa_node); 496 if (!wq->cq_buf) { 497 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 498 return -ENOMEM; 499 } 500 /* Register allocated buffer in user space with DevX. */ 501 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 502 (void *)(uintptr_t)wq->cq_buf, 503 umem_size, 504 IBV_ACCESS_LOCAL_WRITE); 505 if (!wq->cq_umem) { 506 rte_errno = errno; 507 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 508 goto error; 509 } 510 /* Create completion queue object for Clock Queue. */ 511 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 512 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 513 cq_attr.use_first_only = 1; 514 cq_attr.overrun_ignore = 1; 515 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 516 cq_attr.eqn = sh->eqn; 517 cq_attr.q_umem_valid = 1; 518 cq_attr.q_umem_offset = 0; 519 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 520 cq_attr.db_umem_valid = 1; 521 cq_attr.db_umem_offset = umem_dbrec; 522 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 523 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 524 cq_attr.log_page_size = rte_log2_u32(page_size); 525 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 526 if (!wq->cq) { 527 rte_errno = errno; 528 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 529 goto error; 530 } 531 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 532 wq->cq_ci = 0; 533 /* Allocate memory buffer for Send Queue WQEs. */ 534 if (sh->txpp.test) { 535 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 536 MLX5_WQE_CSEG_SIZE + 537 2 * MLX5_WQE_ESEG_SIZE - 538 MLX5_ESEG_MIN_INLINE_SIZE, 539 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 540 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 541 } else { 542 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 543 } 544 /* There should not be WQE leftovers in the cyclic queue. */ 545 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 546 umem_size = MLX5_WQE_SIZE * wq->sq_size; 547 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 548 umem_size += MLX5_DBR_SIZE; 549 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 550 page_size, sh->numa_node); 551 if (!wq->sq_buf) { 552 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 553 rte_errno = ENOMEM; 554 goto error; 555 } 556 /* Register allocated buffer in user space with DevX. */ 557 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 558 (void *)(uintptr_t)wq->sq_buf, 559 umem_size, 560 IBV_ACCESS_LOCAL_WRITE); 561 if (!wq->sq_umem) { 562 rte_errno = errno; 563 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 564 goto error; 565 } 566 /* Create send queue object for Clock Queue. */ 567 if (sh->txpp.test) { 568 sq_attr.tis_lst_sz = 1; 569 sq_attr.tis_num = sh->tis->id; 570 sq_attr.non_wire = 0; 571 sq_attr.static_sq_wq = 1; 572 } else { 573 sq_attr.non_wire = 1; 574 sq_attr.static_sq_wq = 1; 575 } 576 sq_attr.state = MLX5_SQC_STATE_RST; 577 sq_attr.cqn = wq->cq->id; 578 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 579 sq_attr.wq_attr.cd_slave = 1; 580 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 581 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 582 sq_attr.wq_attr.pd = sh->pdn; 583 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 584 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 585 sq_attr.wq_attr.dbr_umem_valid = 1; 586 sq_attr.wq_attr.dbr_addr = umem_dbrec; 587 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 588 sq_attr.wq_attr.wq_umem_valid = 1; 589 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 590 /* umem_offset must be zero for static_sq_wq queue. */ 591 sq_attr.wq_attr.wq_umem_offset = 0; 592 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 593 if (!wq->sq) { 594 rte_errno = errno; 595 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 596 goto error; 597 } 598 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 599 MLX5_SND_DBR * sizeof(uint32_t)); 600 /* Build the WQEs in the Send Queue before goto Ready state. */ 601 mlx5_txpp_fill_wqe_clock_queue(sh); 602 /* Change queue state to ready. */ 603 msq_attr.sq_state = MLX5_SQC_STATE_RST; 604 msq_attr.state = MLX5_SQC_STATE_RDY; 605 wq->sq_ci = 0; 606 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 607 if (ret) { 608 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 609 goto error; 610 } 611 return 0; 612 error: 613 ret = -rte_errno; 614 mlx5_txpp_destroy_clock_queue(sh); 615 rte_errno = -ret; 616 return ret; 617 } 618 619 /* Enable notification from the Rearm Queue CQ. */ 620 static inline void 621 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 622 { 623 void *base_addr; 624 625 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 626 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 627 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 628 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 629 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 630 uint32_t *addr = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 631 632 rte_compiler_barrier(); 633 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 634 rte_wmb(); 635 #ifdef RTE_ARCH_64 636 *(uint64_t *)addr = db_be; 637 #else 638 *(uint32_t *)addr = db_be; 639 rte_io_wmb(); 640 *((uint32_t *)addr + 1) = db_be >> 32; 641 #endif 642 aq->arm_sn++; 643 } 644 645 #if defined(RTE_ARCH_X86_64) 646 static inline int 647 mlx5_atomic128_compare_exchange(rte_int128_t *dst, 648 rte_int128_t *exp, 649 const rte_int128_t *src) 650 { 651 uint8_t res; 652 653 asm volatile (MPLOCKED 654 "cmpxchg16b %[dst];" 655 " sete %[res]" 656 : [dst] "=m" (dst->val[0]), 657 "=a" (exp->val[0]), 658 "=d" (exp->val[1]), 659 [res] "=r" (res) 660 : "b" (src->val[0]), 661 "c" (src->val[1]), 662 "a" (exp->val[0]), 663 "d" (exp->val[1]), 664 "m" (dst->val[0]) 665 : "memory"); 666 667 return res; 668 } 669 #endif 670 671 static inline void 672 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 673 { 674 /* 675 * The only CQE of Clock Queue is being continuously 676 * update by hardware with soecified rate. We have to 677 * read timestump and WQE completion index atomically. 678 */ 679 #if defined(RTE_ARCH_X86_64) 680 rte_int128_t src; 681 682 memset(&src, 0, sizeof(src)); 683 *ts = src; 684 /* if (*from == *ts) *from = *src else *ts = *from; */ 685 mlx5_atomic128_compare_exchange(from, ts, &src); 686 #else 687 uint64_t *cqe = (uint64_t *)from; 688 689 /* 690 * Power architecture does not support 16B compare-and-swap. 691 * ARM implements it in software, code below is more relevant. 692 */ 693 for (;;) { 694 uint64_t tm, op; 695 uint64_t *ps; 696 697 rte_compiler_barrier(); 698 tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); 699 op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); 700 rte_compiler_barrier(); 701 if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) 702 continue; 703 if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) 704 continue; 705 ps = (uint64_t *)ts; 706 ps[0] = tm; 707 ps[1] = op; 708 return; 709 } 710 #endif 711 } 712 713 /* Stores timestamp in the cache structure to share data with datapath. */ 714 static inline void 715 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 716 uint64_t ts, uint64_t ci) 717 { 718 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 719 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 720 rte_compiler_barrier(); 721 __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); 722 __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); 723 rte_wmb(); 724 } 725 726 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 727 static inline void 728 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 729 { 730 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 731 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 732 union { 733 rte_int128_t u128; 734 struct mlx5_cqe_ts cts; 735 } to; 736 uint64_t ts; 737 uint16_t ci; 738 739 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 740 "Wrong timestamp CQE part size"); 741 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 742 if (to.cts.op_own >> 4) { 743 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 744 __atomic_fetch_add(&sh->txpp.err_clock_queue, 745 1, __ATOMIC_RELAXED); 746 sh->txpp.sync_lost = 1; 747 return; 748 } 749 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 750 ts = rte_be_to_cpu_64(to.cts.timestamp); 751 ts = mlx5_txpp_convert_rx_ts(sh, ts); 752 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 753 wq->sq_ci = ci; 754 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 755 } 756 757 /* Waits for the first completion on Clock Queue to init timestamp. */ 758 static inline void 759 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 760 { 761 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 762 uint32_t wait; 763 764 sh->txpp.ts_p = 0; 765 sh->txpp.ts_n = 0; 766 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 767 struct timespec onems; 768 769 mlx5_txpp_update_timestamp(sh); 770 if (wq->sq_ci) 771 return; 772 /* Wait one millisecond and try again. */ 773 onems.tv_sec = 0; 774 onems.tv_nsec = NS_PER_S / MS_PER_S; 775 nanosleep(&onems, 0); 776 } 777 DRV_LOG(ERR, "Unable to initialize timestamp."); 778 sh->txpp.sync_lost = 1; 779 } 780 781 #ifdef HAVE_IBV_DEVX_EVENT 782 /* Gather statistics for timestamp from Clock Queue CQE. */ 783 static inline void 784 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 785 { 786 /* Check whether we have a valid timestamp. */ 787 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 788 return; 789 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 790 __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, 791 sh->txpp.ts.ts, __ATOMIC_RELAXED); 792 __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, 793 sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 794 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 795 sh->txpp.ts_p = 0; 796 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 797 ++sh->txpp.ts_n; 798 } 799 800 /* Handles Rearm Queue completions in periodic service. */ 801 static __rte_always_inline void 802 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 803 { 804 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 805 uint32_t cq_ci = wq->cq_ci; 806 bool error = false; 807 int ret; 808 809 do { 810 volatile struct mlx5_cqe *cqe; 811 812 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 813 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 814 switch (ret) { 815 case MLX5_CQE_STATUS_ERR: 816 error = true; 817 ++cq_ci; 818 break; 819 case MLX5_CQE_STATUS_SW_OWN: 820 wq->sq_ci += 2; 821 ++cq_ci; 822 break; 823 case MLX5_CQE_STATUS_HW_OWN: 824 break; 825 default: 826 MLX5_ASSERT(false); 827 break; 828 } 829 } while (ret != MLX5_CQE_STATUS_HW_OWN); 830 if (likely(cq_ci != wq->cq_ci)) { 831 /* Check whether we have missed interrupts. */ 832 if (cq_ci - wq->cq_ci != 1) { 833 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 834 __atomic_fetch_add(&sh->txpp.err_miss_int, 835 1, __ATOMIC_RELAXED); 836 /* Check sync lost on wqe index. */ 837 if (cq_ci - wq->cq_ci >= 838 (((1UL << MLX5_WQ_INDEX_WIDTH) / 839 MLX5_TXPP_REARM) - 1)) 840 error = 1; 841 } 842 /* Update doorbell record to notify hardware. */ 843 rte_compiler_barrier(); 844 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 845 rte_wmb(); 846 wq->cq_ci = cq_ci; 847 /* Fire new requests to Rearm Queue. */ 848 if (error) { 849 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 850 __atomic_fetch_add(&sh->txpp.err_rearm_queue, 851 1, __ATOMIC_RELAXED); 852 sh->txpp.sync_lost = 1; 853 } 854 } 855 } 856 857 /* Handles Clock Queue completions in periodic service. */ 858 static __rte_always_inline void 859 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 860 { 861 mlx5_txpp_update_timestamp(sh); 862 mlx5_txpp_gather_timestamp(sh); 863 } 864 #endif 865 866 /* Invoked periodically on Rearm Queue completions. */ 867 void 868 mlx5_txpp_interrupt_handler(void *cb_arg) 869 { 870 #ifndef HAVE_IBV_DEVX_EVENT 871 RTE_SET_USED(cb_arg); 872 return; 873 #else 874 struct mlx5_dev_ctx_shared *sh = cb_arg; 875 union { 876 struct mlx5dv_devx_async_event_hdr event_resp; 877 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 878 } out; 879 880 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 881 /* Process events in the loop. Only rearm completions are expected. */ 882 while (mlx5_glue->devx_get_event 883 (sh->txpp.echan, 884 &out.event_resp, 885 sizeof(out.buf)) >= 886 (ssize_t)sizeof(out.event_resp.cookie)) { 887 mlx5_txpp_handle_rearm_queue(sh); 888 mlx5_txpp_handle_clock_queue(sh); 889 mlx5_txpp_cq_arm(sh); 890 mlx5_txpp_doorbell_rearm_queue 891 (sh, sh->txpp.rearm_queue.sq_ci - 1); 892 } 893 #endif /* HAVE_IBV_DEVX_ASYNC */ 894 } 895 896 static void 897 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 898 { 899 if (!sh->txpp.intr_handle.fd) 900 return; 901 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 902 mlx5_txpp_interrupt_handler, sh); 903 sh->txpp.intr_handle.fd = 0; 904 } 905 906 /* Attach interrupt handler and fires first request to Rearm Queue. */ 907 static int 908 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 909 { 910 uint16_t event_nums[1] = {0}; 911 int ret; 912 int fd; 913 914 sh->txpp.err_miss_int = 0; 915 sh->txpp.err_rearm_queue = 0; 916 sh->txpp.err_clock_queue = 0; 917 sh->txpp.err_ts_past = 0; 918 sh->txpp.err_ts_future = 0; 919 /* Attach interrupt handler to process Rearm Queue completions. */ 920 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 921 ret = mlx5_os_set_nonblock_channel_fd(fd); 922 if (ret) { 923 DRV_LOG(ERR, "Failed to change event channel FD."); 924 rte_errno = errno; 925 return -rte_errno; 926 } 927 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 928 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 929 sh->txpp.intr_handle.fd = fd; 930 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 931 if (rte_intr_callback_register(&sh->txpp.intr_handle, 932 mlx5_txpp_interrupt_handler, sh)) { 933 sh->txpp.intr_handle.fd = 0; 934 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 935 return -rte_errno; 936 } 937 /* Subscribe CQ event to the event channel controlled by the driver. */ 938 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 939 sh->txpp.rearm_queue.cq->obj, 940 sizeof(event_nums), 941 event_nums, 0); 942 if (ret) { 943 DRV_LOG(ERR, "Failed to subscribe CQE event."); 944 rte_errno = errno; 945 return -errno; 946 } 947 /* Enable interrupts in the CQ. */ 948 mlx5_txpp_cq_arm(sh); 949 /* Fire the first request on Rearm Queue. */ 950 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 951 mlx5_txpp_init_timestamp(sh); 952 return 0; 953 } 954 955 /* 956 * The routine initializes the packet pacing infrastructure: 957 * - allocates PP context 958 * - Clock CQ/SQ 959 * - Rearm CQ/SQ 960 * - attaches rearm interrupt handler 961 * - starts Clock Queue 962 * 963 * Returns 0 on success, negative otherwise 964 */ 965 static int 966 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 967 { 968 int tx_pp = priv->config.tx_pp; 969 int ret; 970 971 /* Store the requested pacing parameters. */ 972 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 973 sh->txpp.test = !!(tx_pp < 0); 974 sh->txpp.skew = priv->config.tx_skew; 975 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 976 ret = mlx5_txpp_create_event_channel(sh); 977 if (ret) 978 goto exit; 979 ret = mlx5_txpp_alloc_pp_index(sh); 980 if (ret) 981 goto exit; 982 ret = mlx5_txpp_create_clock_queue(sh); 983 if (ret) 984 goto exit; 985 ret = mlx5_txpp_create_rearm_queue(sh); 986 if (ret) 987 goto exit; 988 ret = mlx5_txpp_start_service(sh); 989 if (ret) 990 goto exit; 991 exit: 992 if (ret) { 993 mlx5_txpp_stop_service(sh); 994 mlx5_txpp_destroy_rearm_queue(sh); 995 mlx5_txpp_destroy_clock_queue(sh); 996 mlx5_txpp_free_pp_index(sh); 997 mlx5_txpp_destroy_event_channel(sh); 998 sh->txpp.tick = 0; 999 sh->txpp.test = 0; 1000 sh->txpp.skew = 0; 1001 } 1002 return ret; 1003 } 1004 1005 /* 1006 * The routine destroys the packet pacing infrastructure: 1007 * - detaches rearm interrupt handler 1008 * - Rearm CQ/SQ 1009 * - Clock CQ/SQ 1010 * - PP context 1011 */ 1012 static void 1013 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 1014 { 1015 mlx5_txpp_stop_service(sh); 1016 mlx5_txpp_destroy_rearm_queue(sh); 1017 mlx5_txpp_destroy_clock_queue(sh); 1018 mlx5_txpp_free_pp_index(sh); 1019 mlx5_txpp_destroy_event_channel(sh); 1020 sh->txpp.tick = 0; 1021 sh->txpp.test = 0; 1022 sh->txpp.skew = 0; 1023 } 1024 1025 /** 1026 * Creates and starts packet pacing infrastructure on specified device. 1027 * 1028 * @param dev 1029 * Pointer to Ethernet device structure. 1030 * 1031 * @return 1032 * 0 on success, a negative errno value otherwise and rte_errno is set. 1033 */ 1034 int 1035 mlx5_txpp_start(struct rte_eth_dev *dev) 1036 { 1037 struct mlx5_priv *priv = dev->data->dev_private; 1038 struct mlx5_dev_ctx_shared *sh = priv->sh; 1039 int err = 0; 1040 int ret; 1041 1042 if (!priv->config.tx_pp) { 1043 /* Packet pacing is not requested for the device. */ 1044 MLX5_ASSERT(priv->txpp_en == 0); 1045 return 0; 1046 } 1047 if (priv->txpp_en) { 1048 /* Packet pacing is already enabled for the device. */ 1049 MLX5_ASSERT(sh->txpp.refcnt); 1050 return 0; 1051 } 1052 if (priv->config.tx_pp > 0) { 1053 ret = rte_mbuf_dynflag_lookup 1054 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1055 if (ret < 0) 1056 return 0; 1057 } 1058 ret = pthread_mutex_lock(&sh->txpp.mutex); 1059 MLX5_ASSERT(!ret); 1060 RTE_SET_USED(ret); 1061 if (sh->txpp.refcnt) { 1062 priv->txpp_en = 1; 1063 ++sh->txpp.refcnt; 1064 } else { 1065 err = mlx5_txpp_create(sh, priv); 1066 if (!err) { 1067 MLX5_ASSERT(sh->txpp.tick); 1068 priv->txpp_en = 1; 1069 sh->txpp.refcnt = 1; 1070 } else { 1071 rte_errno = -err; 1072 } 1073 } 1074 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1075 MLX5_ASSERT(!ret); 1076 RTE_SET_USED(ret); 1077 return err; 1078 } 1079 1080 /** 1081 * Stops and destroys packet pacing infrastructure on specified device. 1082 * 1083 * @param dev 1084 * Pointer to Ethernet device structure. 1085 * 1086 * @return 1087 * 0 on success, a negative errno value otherwise and rte_errno is set. 1088 */ 1089 void 1090 mlx5_txpp_stop(struct rte_eth_dev *dev) 1091 { 1092 struct mlx5_priv *priv = dev->data->dev_private; 1093 struct mlx5_dev_ctx_shared *sh = priv->sh; 1094 int ret; 1095 1096 if (!priv->txpp_en) { 1097 /* Packet pacing is already disabled for the device. */ 1098 return; 1099 } 1100 priv->txpp_en = 0; 1101 ret = pthread_mutex_lock(&sh->txpp.mutex); 1102 MLX5_ASSERT(!ret); 1103 RTE_SET_USED(ret); 1104 MLX5_ASSERT(sh->txpp.refcnt); 1105 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1106 return; 1107 /* No references any more, do actual destroy. */ 1108 mlx5_txpp_destroy(sh); 1109 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1110 MLX5_ASSERT(!ret); 1111 RTE_SET_USED(ret); 1112 } 1113 1114 /* 1115 * Read the current clock counter of an Ethernet device 1116 * 1117 * This returns the current raw clock value of an Ethernet device. It is 1118 * a raw amount of ticks, with no given time reference. 1119 * The value returned here is from the same clock than the one 1120 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1121 * offload. Therefore it can be used to compute a precise conversion of 1122 * the device clock to the real time. 1123 * 1124 * @param dev 1125 * Pointer to Ethernet device structure. 1126 * @param clock 1127 * Pointer to the uint64_t that holds the raw clock value. 1128 * 1129 * @return 1130 * - 0: Success. 1131 * - -ENOTSUP: The function is not supported in this mode. Requires 1132 * packet pacing module configured and started (tx_pp devarg) 1133 */ 1134 int 1135 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1136 { 1137 struct mlx5_priv *priv = dev->data->dev_private; 1138 struct mlx5_dev_ctx_shared *sh = priv->sh; 1139 int ret; 1140 1141 if (sh->txpp.refcnt) { 1142 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1143 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1144 union { 1145 rte_int128_t u128; 1146 struct mlx5_cqe_ts cts; 1147 } to; 1148 uint64_t ts; 1149 1150 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1151 if (to.cts.op_own >> 4) { 1152 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1153 __atomic_fetch_add(&sh->txpp.err_clock_queue, 1154 1, __ATOMIC_RELAXED); 1155 sh->txpp.sync_lost = 1; 1156 return -EIO; 1157 } 1158 ts = rte_be_to_cpu_64(to.cts.timestamp); 1159 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1160 *timestamp = ts; 1161 return 0; 1162 } 1163 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1164 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1165 return -ENOTSUP; 1166 ret = mlx5_read_clock(dev, timestamp); 1167 return ret; 1168 } 1169 1170 /** 1171 * DPDK callback to clear device extended statistics. 1172 * 1173 * @param dev 1174 * Pointer to Ethernet device structure. 1175 * 1176 * @return 1177 * 0 on success and stats is reset, negative errno value otherwise and 1178 * rte_errno is set. 1179 */ 1180 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1181 { 1182 struct mlx5_priv *priv = dev->data->dev_private; 1183 struct mlx5_dev_ctx_shared *sh = priv->sh; 1184 1185 __atomic_store_n(&sh->txpp.err_miss_int, 0, __ATOMIC_RELAXED); 1186 __atomic_store_n(&sh->txpp.err_rearm_queue, 0, __ATOMIC_RELAXED); 1187 __atomic_store_n(&sh->txpp.err_clock_queue, 0, __ATOMIC_RELAXED); 1188 __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); 1189 __atomic_store_n(&sh->txpp.err_ts_future, 0, __ATOMIC_RELAXED); 1190 return 0; 1191 } 1192 1193 /** 1194 * Routine to retrieve names of extended device statistics 1195 * for packet send scheduling. It appends the specific stats names 1196 * after the parts filled by preceding modules (eth stats, etc.) 1197 * 1198 * @param dev 1199 * Pointer to Ethernet device structure. 1200 * @param[out] xstats_names 1201 * Buffer to insert names into. 1202 * @param n 1203 * Number of names. 1204 * @param n_used 1205 * Number of names filled by preceding statistics modules. 1206 * 1207 * @return 1208 * Number of xstats names. 1209 */ 1210 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1211 struct rte_eth_xstat_name *xstats_names, 1212 unsigned int n, unsigned int n_used) 1213 { 1214 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1215 unsigned int i; 1216 1217 if (n >= n_used + n_txpp && xstats_names) { 1218 for (i = 0; i < n_txpp; ++i) { 1219 strncpy(xstats_names[i + n_used].name, 1220 mlx5_txpp_stat_names[i], 1221 RTE_ETH_XSTATS_NAME_SIZE); 1222 xstats_names[i + n_used].name 1223 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1224 } 1225 } 1226 return n_used + n_txpp; 1227 } 1228 1229 static inline void 1230 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1231 struct mlx5_txpp_ts *tsa, uint16_t idx) 1232 { 1233 do { 1234 uint64_t ts, ci; 1235 1236 ts = __atomic_load_n(&txpp->tsa[idx].ts, __ATOMIC_RELAXED); 1237 ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, __ATOMIC_RELAXED); 1238 rte_compiler_barrier(); 1239 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1240 continue; 1241 if (__atomic_load_n(&txpp->tsa[idx].ts, 1242 __ATOMIC_RELAXED) != ts) 1243 continue; 1244 if (__atomic_load_n(&txpp->tsa[idx].ci_ts, 1245 __ATOMIC_RELAXED) != ci) 1246 continue; 1247 tsa->ts = ts; 1248 tsa->ci_ts = ci; 1249 return; 1250 } while (true); 1251 } 1252 1253 /* 1254 * Jitter reflects the clock change between 1255 * neighbours Clock Queue completions. 1256 */ 1257 static uint64_t 1258 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1259 { 1260 struct mlx5_txpp_ts tsa0, tsa1; 1261 int64_t dts, dci; 1262 uint16_t ts_p; 1263 1264 if (txpp->ts_n < 2) { 1265 /* No gathered enough reports yet. */ 1266 return 0; 1267 } 1268 do { 1269 int ts_0, ts_1; 1270 1271 ts_p = txpp->ts_p; 1272 rte_compiler_barrier(); 1273 ts_0 = ts_p - 2; 1274 if (ts_0 < 0) 1275 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1276 ts_1 = ts_p - 1; 1277 if (ts_1 < 0) 1278 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1279 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1280 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1281 rte_compiler_barrier(); 1282 } while (ts_p != txpp->ts_p); 1283 /* We have two neighbor reports, calculate the jitter. */ 1284 dts = tsa1.ts - tsa0.ts; 1285 dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1286 (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); 1287 if (dci < 0) 1288 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1289 dci *= txpp->tick; 1290 return (dts > dci) ? dts - dci : dci - dts; 1291 } 1292 1293 /* 1294 * Wander reflects the long-term clock change 1295 * over the entire length of all Clock Queue completions. 1296 */ 1297 static uint64_t 1298 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1299 { 1300 struct mlx5_txpp_ts tsa0, tsa1; 1301 int64_t dts, dci; 1302 uint16_t ts_p; 1303 1304 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1305 /* No gathered enough reports yet. */ 1306 return 0; 1307 } 1308 do { 1309 int ts_0, ts_1; 1310 1311 ts_p = txpp->ts_p; 1312 rte_compiler_barrier(); 1313 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1314 if (ts_0 < 0) 1315 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1316 ts_1 = ts_p - 1; 1317 if (ts_1 < 0) 1318 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1319 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1320 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1321 rte_compiler_barrier(); 1322 } while (ts_p != txpp->ts_p); 1323 /* We have two neighbor reports, calculate the jitter. */ 1324 dts = tsa1.ts - tsa0.ts; 1325 dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1326 (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); 1327 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1328 dci *= txpp->tick; 1329 return (dts > dci) ? dts - dci : dci - dts; 1330 } 1331 1332 /** 1333 * Routine to retrieve extended device statistics 1334 * for packet send scheduling. It appends the specific statistics 1335 * after the parts filled by preceding modules (eth stats, etc.) 1336 * 1337 * @param dev 1338 * Pointer to Ethernet device. 1339 * @param[out] stats 1340 * Pointer to rte extended stats table. 1341 * @param n 1342 * The size of the stats table. 1343 * @param n_used 1344 * Number of stats filled by preceding statistics modules. 1345 * 1346 * @return 1347 * Number of extended stats on success and stats is filled, 1348 * negative on error and rte_errno is set. 1349 */ 1350 int 1351 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1352 struct rte_eth_xstat *stats, 1353 unsigned int n, unsigned int n_used) 1354 { 1355 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1356 1357 if (n >= n_used + n_txpp && stats) { 1358 struct mlx5_priv *priv = dev->data->dev_private; 1359 struct mlx5_dev_ctx_shared *sh = priv->sh; 1360 unsigned int i; 1361 1362 for (i = 0; i < n_txpp; ++i) 1363 stats[n_used + i].id = n_used + i; 1364 stats[n_used + 0].value = 1365 __atomic_load_n(&sh->txpp.err_miss_int, 1366 __ATOMIC_RELAXED); 1367 stats[n_used + 1].value = 1368 __atomic_load_n(&sh->txpp.err_rearm_queue, 1369 __ATOMIC_RELAXED); 1370 stats[n_used + 2].value = 1371 __atomic_load_n(&sh->txpp.err_clock_queue, 1372 __ATOMIC_RELAXED); 1373 stats[n_used + 3].value = 1374 __atomic_load_n(&sh->txpp.err_ts_past, 1375 __ATOMIC_RELAXED); 1376 stats[n_used + 4].value = 1377 __atomic_load_n(&sh->txpp.err_ts_future, 1378 __ATOMIC_RELAXED); 1379 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1380 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1381 stats[n_used + 7].value = sh->txpp.sync_lost; 1382 } 1383 return n_used + n_txpp; 1384 } 1385