1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <fcntl.h> 5 #include <stdint.h> 6 7 #include <rte_ether.h> 8 #include <rte_ethdev_driver.h> 9 #include <rte_interrupts.h> 10 #include <rte_alarm.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_paging.h> 14 15 #include <mlx5_malloc.h> 16 17 #include "mlx5.h" 18 #include "mlx5_rxtx.h" 19 #include "mlx5_common_os.h" 20 21 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), 22 "Wrong timestamp CQE part size"); 23 24 static const char * const mlx5_txpp_stat_names[] = { 25 "tx_pp_missed_interrupt_errors", /* Missed service interrupt. */ 26 "tx_pp_rearm_queue_errors", /* Rearm Queue errors. */ 27 "tx_pp_clock_queue_errors", /* Clock Queue errors. */ 28 "tx_pp_timestamp_past_errors", /* Timestamp in the past. */ 29 "tx_pp_timestamp_future_errors", /* Timestamp in the distant future. */ 30 "tx_pp_jitter", /* Timestamp jitter (one Clock Queue completion). */ 31 "tx_pp_wander", /* Timestamp wander (half of Clock Queue CQEs). */ 32 "tx_pp_sync_lost", /* Scheduling synchronization lost. */ 33 }; 34 35 /* Destroy Event Queue Notification Channel. */ 36 static void 37 mlx5_txpp_destroy_event_channel(struct mlx5_dev_ctx_shared *sh) 38 { 39 if (sh->txpp.echan) { 40 mlx5_glue->devx_destroy_event_channel(sh->txpp.echan); 41 sh->txpp.echan = NULL; 42 } 43 } 44 45 /* Create Event Queue Notification Channel. */ 46 static int 47 mlx5_txpp_create_event_channel(struct mlx5_dev_ctx_shared *sh) 48 { 49 MLX5_ASSERT(!sh->txpp.echan); 50 sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx, 51 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 52 if (!sh->txpp.echan) { 53 rte_errno = errno; 54 DRV_LOG(ERR, "Failed to create event channel %d.", rte_errno); 55 return -rte_errno; 56 } 57 return 0; 58 } 59 60 static void 61 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) 62 { 63 #ifdef HAVE_MLX5DV_PP_ALLOC 64 if (sh->txpp.pp) { 65 mlx5_glue->dv_free_pp(sh->txpp.pp); 66 sh->txpp.pp = NULL; 67 sh->txpp.pp_id = 0; 68 } 69 #else 70 RTE_SET_USED(sh); 71 DRV_LOG(ERR, "Freeing pacing index is not supported."); 72 #endif 73 } 74 75 /* Allocate Packet Pacing index from kernel via mlx5dv call. */ 76 static int 77 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh) 78 { 79 #ifdef HAVE_MLX5DV_PP_ALLOC 80 uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)]; 81 uint64_t rate; 82 83 MLX5_ASSERT(!sh->txpp.pp); 84 memset(&pp, 0, sizeof(pp)); 85 rate = NS_PER_S / sh->txpp.tick; 86 if (rate * sh->txpp.tick != NS_PER_S) 87 DRV_LOG(WARNING, "Packet pacing frequency is not precise."); 88 if (sh->txpp.test) { 89 uint32_t len; 90 91 len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE, 92 (size_t)RTE_ETHER_MIN_LEN); 93 MLX5_SET(set_pp_rate_limit_context, &pp, 94 burst_upper_bound, len); 95 MLX5_SET(set_pp_rate_limit_context, &pp, 96 typical_packet_size, len); 97 /* Convert packets per second into kilobits. */ 98 rate = (rate * len) / (1000ul / CHAR_BIT); 99 DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate); 100 } 101 MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate); 102 MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode, 103 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE); 104 sh->txpp.pp = mlx5_glue->dv_alloc_pp 105 (sh->ctx, sizeof(pp), &pp, 106 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX); 107 if (sh->txpp.pp == NULL) { 108 DRV_LOG(ERR, "Failed to allocate packet pacing index."); 109 rte_errno = errno; 110 return -errno; 111 } 112 if (!((struct mlx5dv_pp *)sh->txpp.pp)->index) { 113 DRV_LOG(ERR, "Zero packet pacing index allocated."); 114 mlx5_txpp_free_pp_index(sh); 115 rte_errno = ENOTSUP; 116 return -ENOTSUP; 117 } 118 sh->txpp.pp_id = ((struct mlx5dv_pp *)(sh->txpp.pp))->index; 119 return 0; 120 #else 121 RTE_SET_USED(sh); 122 DRV_LOG(ERR, "Allocating pacing index is not supported."); 123 rte_errno = ENOTSUP; 124 return -ENOTSUP; 125 #endif 126 } 127 128 static void 129 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq) 130 { 131 if (wq->sq) 132 claim_zero(mlx5_devx_cmd_destroy(wq->sq)); 133 if (wq->sq_umem) 134 claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem)); 135 if (wq->sq_buf) 136 mlx5_free((void *)(uintptr_t)wq->sq_buf); 137 if (wq->cq) 138 claim_zero(mlx5_devx_cmd_destroy(wq->cq)); 139 if (wq->cq_umem) 140 claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem)); 141 if (wq->cq_buf) 142 mlx5_free((void *)(uintptr_t)wq->cq_buf); 143 memset(wq, 0, sizeof(*wq)); 144 } 145 146 static void 147 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh) 148 { 149 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 150 151 mlx5_txpp_destroy_send_queue(wq); 152 } 153 154 static void 155 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh) 156 { 157 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 158 159 mlx5_txpp_destroy_send_queue(wq); 160 if (sh->txpp.tsa) { 161 mlx5_free(sh->txpp.tsa); 162 sh->txpp.tsa = NULL; 163 } 164 } 165 166 static void 167 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci) 168 { 169 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 170 union { 171 uint32_t w32[2]; 172 uint64_t w64; 173 } cs; 174 void *reg_addr; 175 176 wq->sq_ci = ci + 1; 177 cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32 178 (wq->wqes[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8); 179 cs.w32[1] = wq->wqes[ci & (wq->sq_size - 1)].ctrl[1]; 180 /* Update SQ doorbell record with new SQ ci. */ 181 rte_compiler_barrier(); 182 *wq->sq_dbrec = rte_cpu_to_be_32(wq->sq_ci); 183 /* Make sure the doorbell record is updated. */ 184 rte_wmb(); 185 /* Write to doorbel register to start processing. */ 186 reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); 187 __mlx5_uar_write64_relaxed(cs.w64, reg_addr, NULL); 188 rte_wmb(); 189 } 190 191 static void 192 mlx5_txpp_fill_cqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 193 { 194 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 195 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 196 uint32_t i; 197 198 for (i = 0; i < MLX5_TXPP_REARM_CQ_SIZE; i++) { 199 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 200 ++cqe; 201 } 202 } 203 204 static void 205 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh) 206 { 207 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 208 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 209 uint32_t i; 210 211 for (i = 0; i < wq->sq_size; i += 2) { 212 struct mlx5_wqe_cseg *cs; 213 struct mlx5_wqe_qseg *qs; 214 uint32_t index; 215 216 /* Build SEND_EN request with slave WQE index. */ 217 cs = &wqe[i + 0].cseg; 218 cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0); 219 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 220 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 221 MLX5_COMP_MODE_OFFSET); 222 cs->misc = RTE_BE32(0); 223 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 224 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) & 225 ((1 << MLX5_WQ_INDEX_WIDTH) - 1); 226 qs->max_index = rte_cpu_to_be_32(index); 227 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.sq->id); 228 /* Build WAIT request with slave CQE index. */ 229 cs = &wqe[i + 1].cseg; 230 cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0); 231 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 2); 232 cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR << 233 MLX5_COMP_MODE_OFFSET); 234 cs->misc = RTE_BE32(0); 235 qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg)); 236 index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) & 237 ((1 << MLX5_CQ_INDEX_WIDTH) - 1); 238 qs->max_index = rte_cpu_to_be_32(index); 239 qs->qpn_cqn = rte_cpu_to_be_32(sh->txpp.clock_queue.cq->id); 240 } 241 } 242 243 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */ 244 static int 245 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) 246 { 247 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 248 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 249 struct mlx5_devx_cq_attr cq_attr = { 0 }; 250 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 251 size_t page_size; 252 uint32_t umem_size, umem_dbrec; 253 int ret; 254 255 page_size = rte_mem_page_size(); 256 if (page_size == (size_t)-1) { 257 DRV_LOG(ERR, "Failed to get mem page size"); 258 return -ENOMEM; 259 } 260 /* Allocate memory buffer for CQEs and doorbell record. */ 261 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_REARM_CQ_SIZE; 262 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 263 umem_size += MLX5_DBR_SIZE; 264 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 265 page_size, sh->numa_node); 266 if (!wq->cq_buf) { 267 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 268 return -ENOMEM; 269 } 270 /* Register allocated buffer in user space with DevX. */ 271 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 272 (void *)(uintptr_t)wq->cq_buf, 273 umem_size, 274 IBV_ACCESS_LOCAL_WRITE); 275 if (!wq->cq_umem) { 276 rte_errno = errno; 277 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 278 goto error; 279 } 280 /* Create completion queue object for Rearm Queue. */ 281 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 282 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 283 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 284 cq_attr.eqn = sh->eqn; 285 cq_attr.q_umem_valid = 1; 286 cq_attr.q_umem_offset = 0; 287 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 288 cq_attr.db_umem_valid = 1; 289 cq_attr.db_umem_offset = umem_dbrec; 290 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 291 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_REARM_CQ_SIZE); 292 cq_attr.log_page_size = rte_log2_u32(page_size); 293 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 294 if (!wq->cq) { 295 rte_errno = errno; 296 DRV_LOG(ERR, "Failed to create CQ for Rearm Queue."); 297 goto error; 298 } 299 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 300 wq->cq_ci = 0; 301 wq->arm_sn = 0; 302 /* Mark all CQEs initially as invalid. */ 303 mlx5_txpp_fill_cqe_rearm_queue(sh); 304 /* 305 * Allocate memory buffer for Send Queue WQEs. 306 * There should be no WQE leftovers in the cyclic queue. 307 */ 308 wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE; 309 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 310 umem_size = MLX5_WQE_SIZE * wq->sq_size; 311 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 312 umem_size += MLX5_DBR_SIZE; 313 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 314 page_size, sh->numa_node); 315 if (!wq->sq_buf) { 316 DRV_LOG(ERR, "Failed to allocate memory for Rearm Queue."); 317 rte_errno = ENOMEM; 318 goto error; 319 } 320 /* Register allocated buffer in user space with DevX. */ 321 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 322 (void *)(uintptr_t)wq->sq_buf, 323 umem_size, 324 IBV_ACCESS_LOCAL_WRITE); 325 if (!wq->sq_umem) { 326 rte_errno = errno; 327 DRV_LOG(ERR, "Failed to register umem for Rearm Queue."); 328 goto error; 329 } 330 /* Create send queue object for Rearm Queue. */ 331 sq_attr.state = MLX5_SQC_STATE_RST; 332 sq_attr.tis_lst_sz = 1; 333 sq_attr.tis_num = sh->tis->id; 334 sq_attr.cqn = wq->cq->id; 335 sq_attr.cd_master = 1; 336 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 337 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 338 sq_attr.wq_attr.pd = sh->pdn; 339 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 340 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 341 sq_attr.wq_attr.dbr_umem_valid = 1; 342 sq_attr.wq_attr.dbr_addr = umem_dbrec; 343 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 344 sq_attr.wq_attr.wq_umem_valid = 1; 345 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 346 sq_attr.wq_attr.wq_umem_offset = 0; 347 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 348 if (!wq->sq) { 349 rte_errno = errno; 350 DRV_LOG(ERR, "Failed to create SQ for Rearm Queue."); 351 goto error; 352 } 353 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 354 MLX5_SND_DBR * sizeof(uint32_t)); 355 /* Build the WQEs in the Send Queue before goto Ready state. */ 356 mlx5_txpp_fill_wqe_rearm_queue(sh); 357 /* Change queue state to ready. */ 358 msq_attr.sq_state = MLX5_SQC_STATE_RST; 359 msq_attr.state = MLX5_SQC_STATE_RDY; 360 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 361 if (ret) { 362 DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue."); 363 goto error; 364 } 365 return 0; 366 error: 367 ret = -rte_errno; 368 mlx5_txpp_destroy_rearm_queue(sh); 369 rte_errno = -ret; 370 return ret; 371 } 372 373 static void 374 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh) 375 { 376 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 377 struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes; 378 struct mlx5_wqe_cseg *cs = &wqe->cseg; 379 uint32_t wqe_size, opcode, i; 380 uint8_t *dst; 381 382 /* For test purposes fill the WQ with SEND inline packet. */ 383 if (sh->txpp.test) { 384 wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 385 MLX5_WQE_CSEG_SIZE + 386 2 * MLX5_WQE_ESEG_SIZE - 387 MLX5_ESEG_MIN_INLINE_SIZE, 388 MLX5_WSEG_SIZE); 389 opcode = MLX5_OPCODE_SEND; 390 } else { 391 wqe_size = MLX5_WSEG_SIZE; 392 opcode = MLX5_OPCODE_NOP; 393 } 394 cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */ 395 cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) | 396 (wqe_size / MLX5_WSEG_SIZE)); 397 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); 398 cs->misc = RTE_BE32(0); 399 wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE); 400 if (sh->txpp.test) { 401 struct mlx5_wqe_eseg *es = &wqe->eseg; 402 struct rte_ether_hdr *eth_hdr; 403 struct rte_ipv4_hdr *ip_hdr; 404 struct rte_udp_hdr *udp_hdr; 405 406 /* Build the inline test packet pattern. */ 407 MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX); 408 MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >= 409 (sizeof(struct rte_ether_hdr) + 410 sizeof(struct rte_ipv4_hdr))); 411 es->flags = 0; 412 es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 413 es->swp_offs = 0; 414 es->metadata = 0; 415 es->swp_flags = 0; 416 es->mss = 0; 417 es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE); 418 /* Build test packet L2 header (Ethernet). */ 419 dst = (uint8_t *)&es->inline_data; 420 eth_hdr = (struct rte_ether_hdr *)dst; 421 rte_eth_random_addr(ð_hdr->d_addr.addr_bytes[0]); 422 rte_eth_random_addr(ð_hdr->s_addr.addr_bytes[0]); 423 eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 424 /* Build test packet L3 header (IP v4). */ 425 dst += sizeof(struct rte_ether_hdr); 426 ip_hdr = (struct rte_ipv4_hdr *)dst; 427 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 428 ip_hdr->type_of_service = 0; 429 ip_hdr->fragment_offset = 0; 430 ip_hdr->time_to_live = 64; 431 ip_hdr->next_proto_id = IPPROTO_UDP; 432 ip_hdr->packet_id = 0; 433 ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 434 sizeof(struct rte_ether_hdr)); 435 /* use RFC5735 / RFC2544 reserved network test addresses */ 436 ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) | 437 (0 << 8) | 1); 438 ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) | 439 (0 << 8) | 2); 440 if (MLX5_TXPP_TEST_PKT_SIZE < 441 (sizeof(struct rte_ether_hdr) + 442 sizeof(struct rte_ipv4_hdr) + 443 sizeof(struct rte_udp_hdr))) 444 goto wcopy; 445 /* Build test packet L4 header (UDP). */ 446 dst += sizeof(struct rte_ipv4_hdr); 447 udp_hdr = (struct rte_udp_hdr *)dst; 448 udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */ 449 udp_hdr->dst_port = RTE_BE16(9); 450 udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE - 451 sizeof(struct rte_ether_hdr) - 452 sizeof(struct rte_ipv4_hdr)); 453 udp_hdr->dgram_cksum = 0; 454 /* Fill the test packet data. */ 455 dst += sizeof(struct rte_udp_hdr); 456 for (i = sizeof(struct rte_ether_hdr) + 457 sizeof(struct rte_ipv4_hdr) + 458 sizeof(struct rte_udp_hdr); 459 i < MLX5_TXPP_TEST_PKT_SIZE; i++) 460 *dst++ = (uint8_t)(i & 0xFF); 461 } 462 wcopy: 463 /* Duplicate the pattern to the next WQEs. */ 464 dst = (uint8_t *)(uintptr_t)wq->sq_buf; 465 for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) { 466 dst += wqe_size; 467 rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size); 468 } 469 } 470 471 /* Creates the Clock Queue for packet pacing, returns zero on success. */ 472 static int 473 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) 474 { 475 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 476 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 477 struct mlx5_devx_cq_attr cq_attr = { 0 }; 478 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 479 size_t page_size; 480 uint32_t umem_size, umem_dbrec; 481 int ret; 482 483 page_size = rte_mem_page_size(); 484 if (page_size == (size_t)-1) { 485 DRV_LOG(ERR, "Failed to get mem page size"); 486 return -ENOMEM; 487 } 488 sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 489 MLX5_TXPP_REARM_SQ_SIZE * 490 sizeof(struct mlx5_txpp_ts), 491 0, sh->numa_node); 492 if (!sh->txpp.tsa) { 493 DRV_LOG(ERR, "Failed to allocate memory for CQ stats."); 494 return -ENOMEM; 495 } 496 sh->txpp.ts_p = 0; 497 sh->txpp.ts_n = 0; 498 /* Allocate memory buffer for CQEs and doorbell record. */ 499 umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE; 500 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 501 umem_size += MLX5_DBR_SIZE; 502 wq->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 503 page_size, sh->numa_node); 504 if (!wq->cq_buf) { 505 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 506 return -ENOMEM; 507 } 508 /* Register allocated buffer in user space with DevX. */ 509 wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 510 (void *)(uintptr_t)wq->cq_buf, 511 umem_size, 512 IBV_ACCESS_LOCAL_WRITE); 513 if (!wq->cq_umem) { 514 rte_errno = errno; 515 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 516 goto error; 517 } 518 /* Create completion queue object for Clock Queue. */ 519 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 520 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 521 cq_attr.use_first_only = 1; 522 cq_attr.overrun_ignore = 1; 523 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 524 cq_attr.eqn = sh->eqn; 525 cq_attr.q_umem_valid = 1; 526 cq_attr.q_umem_offset = 0; 527 cq_attr.q_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 528 cq_attr.db_umem_valid = 1; 529 cq_attr.db_umem_offset = umem_dbrec; 530 cq_attr.db_umem_id = mlx5_os_get_umem_id(wq->cq_umem); 531 cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE); 532 cq_attr.log_page_size = rte_log2_u32(page_size); 533 wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 534 if (!wq->cq) { 535 rte_errno = errno; 536 DRV_LOG(ERR, "Failed to create CQ for Clock Queue."); 537 goto error; 538 } 539 wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec); 540 wq->cq_ci = 0; 541 /* Allocate memory buffer for Send Queue WQEs. */ 542 if (sh->txpp.test) { 543 wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE + 544 MLX5_WQE_CSEG_SIZE + 545 2 * MLX5_WQE_ESEG_SIZE - 546 MLX5_ESEG_MIN_INLINE_SIZE, 547 MLX5_WQE_SIZE) / MLX5_WQE_SIZE; 548 wq->sq_size *= MLX5_TXPP_CLKQ_SIZE; 549 } else { 550 wq->sq_size = MLX5_TXPP_CLKQ_SIZE; 551 } 552 /* There should not be WQE leftovers in the cyclic queue. */ 553 MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size))); 554 umem_size = MLX5_WQE_SIZE * wq->sq_size; 555 umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE); 556 umem_size += MLX5_DBR_SIZE; 557 wq->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, 558 page_size, sh->numa_node); 559 if (!wq->sq_buf) { 560 DRV_LOG(ERR, "Failed to allocate memory for Clock Queue."); 561 rte_errno = ENOMEM; 562 goto error; 563 } 564 /* Register allocated buffer in user space with DevX. */ 565 wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx, 566 (void *)(uintptr_t)wq->sq_buf, 567 umem_size, 568 IBV_ACCESS_LOCAL_WRITE); 569 if (!wq->sq_umem) { 570 rte_errno = errno; 571 DRV_LOG(ERR, "Failed to register umem for Clock Queue."); 572 goto error; 573 } 574 /* Create send queue object for Clock Queue. */ 575 if (sh->txpp.test) { 576 sq_attr.tis_lst_sz = 1; 577 sq_attr.tis_num = sh->tis->id; 578 sq_attr.non_wire = 0; 579 sq_attr.static_sq_wq = 1; 580 } else { 581 sq_attr.non_wire = 1; 582 sq_attr.static_sq_wq = 1; 583 } 584 sq_attr.state = MLX5_SQC_STATE_RST; 585 sq_attr.cqn = wq->cq->id; 586 sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; 587 sq_attr.wq_attr.cd_slave = 1; 588 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 589 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 590 sq_attr.wq_attr.pd = sh->pdn; 591 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 592 sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size); 593 sq_attr.wq_attr.dbr_umem_valid = 1; 594 sq_attr.wq_attr.dbr_addr = umem_dbrec; 595 sq_attr.wq_attr.dbr_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 596 sq_attr.wq_attr.wq_umem_valid = 1; 597 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(wq->sq_umem); 598 /* umem_offset must be zero for static_sq_wq queue. */ 599 sq_attr.wq_attr.wq_umem_offset = 0; 600 wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 601 if (!wq->sq) { 602 rte_errno = errno; 603 DRV_LOG(ERR, "Failed to create SQ for Clock Queue."); 604 goto error; 605 } 606 wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec + 607 MLX5_SND_DBR * sizeof(uint32_t)); 608 /* Build the WQEs in the Send Queue before goto Ready state. */ 609 mlx5_txpp_fill_wqe_clock_queue(sh); 610 /* Change queue state to ready. */ 611 msq_attr.sq_state = MLX5_SQC_STATE_RST; 612 msq_attr.state = MLX5_SQC_STATE_RDY; 613 wq->sq_ci = 0; 614 ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr); 615 if (ret) { 616 DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue."); 617 goto error; 618 } 619 return 0; 620 error: 621 ret = -rte_errno; 622 mlx5_txpp_destroy_clock_queue(sh); 623 rte_errno = -ret; 624 return ret; 625 } 626 627 /* Enable notification from the Rearm Queue CQ. */ 628 static inline void 629 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh) 630 { 631 void *base_addr; 632 633 struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue; 634 uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET; 635 uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci; 636 uint64_t db_be = rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq->id); 637 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 638 uint32_t *addr = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 639 640 rte_compiler_barrier(); 641 aq->cq_dbrec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(db_hi); 642 rte_wmb(); 643 #ifdef RTE_ARCH_64 644 *(uint64_t *)addr = db_be; 645 #else 646 *(uint32_t *)addr = db_be; 647 rte_io_wmb(); 648 *((uint32_t *)addr + 1) = db_be >> 32; 649 #endif 650 aq->arm_sn++; 651 } 652 653 #if defined(RTE_ARCH_X86_64) 654 static inline int 655 mlx5_atomic128_compare_exchange(rte_int128_t *dst, 656 rte_int128_t *exp, 657 const rte_int128_t *src) 658 { 659 uint8_t res; 660 661 asm volatile (MPLOCKED 662 "cmpxchg16b %[dst];" 663 " sete %[res]" 664 : [dst] "=m" (dst->val[0]), 665 "=a" (exp->val[0]), 666 "=d" (exp->val[1]), 667 [res] "=r" (res) 668 : "b" (src->val[0]), 669 "c" (src->val[1]), 670 "a" (exp->val[0]), 671 "d" (exp->val[1]), 672 "m" (dst->val[0]) 673 : "memory"); 674 675 return res; 676 } 677 #endif 678 679 static inline void 680 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) 681 { 682 /* 683 * The only CQE of Clock Queue is being continuously 684 * update by hardware with soecified rate. We have to 685 * read timestump and WQE completion index atomically. 686 */ 687 #if defined(RTE_ARCH_X86_64) 688 rte_int128_t src; 689 690 memset(&src, 0, sizeof(src)); 691 *ts = src; 692 /* if (*from == *ts) *from = *src else *ts = *from; */ 693 mlx5_atomic128_compare_exchange(from, ts, &src); 694 #else 695 uint64_t *cqe = (uint64_t *)from; 696 697 /* 698 * Power architecture does not support 16B compare-and-swap. 699 * ARM implements it in software, code below is more relevant. 700 */ 701 for (;;) { 702 uint64_t tm, op; 703 uint64_t *ps; 704 705 rte_compiler_barrier(); 706 tm = __atomic_load_n(cqe + 0, __ATOMIC_RELAXED); 707 op = __atomic_load_n(cqe + 1, __ATOMIC_RELAXED); 708 rte_compiler_barrier(); 709 if (tm != __atomic_load_n(cqe + 0, __ATOMIC_RELAXED)) 710 continue; 711 if (op != __atomic_load_n(cqe + 1, __ATOMIC_RELAXED)) 712 continue; 713 ps = (uint64_t *)ts; 714 ps[0] = tm; 715 ps[1] = op; 716 return; 717 } 718 #endif 719 } 720 721 /* Stores timestamp in the cache structure to share data with datapath. */ 722 static inline void 723 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh, 724 uint64_t ts, uint64_t ci) 725 { 726 ci = ci << (64 - MLX5_CQ_INDEX_WIDTH); 727 ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH; 728 rte_compiler_barrier(); 729 __atomic_store_n(&sh->txpp.ts.ts, ts, __ATOMIC_RELAXED); 730 __atomic_store_n(&sh->txpp.ts.ci_ts, ci, __ATOMIC_RELAXED); 731 rte_wmb(); 732 } 733 734 /* Reads timestamp from Clock Queue CQE and stores in the cache. */ 735 static inline void 736 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) 737 { 738 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 739 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 740 union { 741 rte_int128_t u128; 742 struct mlx5_cqe_ts cts; 743 } to; 744 uint64_t ts; 745 uint16_t ci; 746 747 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 748 if (to.cts.op_own >> 4) { 749 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 750 __atomic_fetch_add(&sh->txpp.err_clock_queue, 751 1, __ATOMIC_RELAXED); 752 sh->txpp.sync_lost = 1; 753 return; 754 } 755 ci = rte_be_to_cpu_16(to.cts.wqe_counter); 756 ts = rte_be_to_cpu_64(to.cts.timestamp); 757 ts = mlx5_txpp_convert_rx_ts(sh, ts); 758 wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX; 759 wq->sq_ci = ci; 760 mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci); 761 } 762 763 /* Waits for the first completion on Clock Queue to init timestamp. */ 764 static inline void 765 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh) 766 { 767 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 768 uint32_t wait; 769 770 sh->txpp.ts_p = 0; 771 sh->txpp.ts_n = 0; 772 for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) { 773 mlx5_txpp_update_timestamp(sh); 774 if (wq->sq_ci) 775 return; 776 /* Wait one millisecond and try again. */ 777 rte_delay_us_sleep(US_PER_S / MS_PER_S); 778 } 779 DRV_LOG(ERR, "Unable to initialize timestamp."); 780 sh->txpp.sync_lost = 1; 781 } 782 783 #ifdef HAVE_IBV_DEVX_EVENT 784 /* Gather statistics for timestamp from Clock Queue CQE. */ 785 static inline void 786 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh) 787 { 788 /* Check whether we have a valid timestamp. */ 789 if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n) 790 return; 791 MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE); 792 __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ts, 793 sh->txpp.ts.ts, __ATOMIC_RELAXED); 794 __atomic_store_n(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts, 795 sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 796 if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE) 797 sh->txpp.ts_p = 0; 798 if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE) 799 ++sh->txpp.ts_n; 800 } 801 802 /* Handles Rearm Queue completions in periodic service. */ 803 static __rte_always_inline void 804 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh) 805 { 806 struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue; 807 uint32_t cq_ci = wq->cq_ci; 808 bool error = false; 809 int ret; 810 811 do { 812 volatile struct mlx5_cqe *cqe; 813 814 cqe = &wq->cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)]; 815 ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci); 816 switch (ret) { 817 case MLX5_CQE_STATUS_ERR: 818 error = true; 819 ++cq_ci; 820 break; 821 case MLX5_CQE_STATUS_SW_OWN: 822 wq->sq_ci += 2; 823 ++cq_ci; 824 break; 825 case MLX5_CQE_STATUS_HW_OWN: 826 break; 827 default: 828 MLX5_ASSERT(false); 829 break; 830 } 831 } while (ret != MLX5_CQE_STATUS_HW_OWN); 832 if (likely(cq_ci != wq->cq_ci)) { 833 /* Check whether we have missed interrupts. */ 834 if (cq_ci - wq->cq_ci != 1) { 835 DRV_LOG(DEBUG, "Rearm Queue missed interrupt."); 836 __atomic_fetch_add(&sh->txpp.err_miss_int, 837 1, __ATOMIC_RELAXED); 838 /* Check sync lost on wqe index. */ 839 if (cq_ci - wq->cq_ci >= 840 (((1UL << MLX5_WQ_INDEX_WIDTH) / 841 MLX5_TXPP_REARM) - 1)) 842 error = 1; 843 } 844 /* Update doorbell record to notify hardware. */ 845 rte_compiler_barrier(); 846 *wq->cq_dbrec = rte_cpu_to_be_32(cq_ci); 847 rte_wmb(); 848 wq->cq_ci = cq_ci; 849 /* Fire new requests to Rearm Queue. */ 850 if (error) { 851 DRV_LOG(DEBUG, "Rearm Queue error sync lost."); 852 __atomic_fetch_add(&sh->txpp.err_rearm_queue, 853 1, __ATOMIC_RELAXED); 854 sh->txpp.sync_lost = 1; 855 } 856 } 857 } 858 859 /* Handles Clock Queue completions in periodic service. */ 860 static __rte_always_inline void 861 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh) 862 { 863 mlx5_txpp_update_timestamp(sh); 864 mlx5_txpp_gather_timestamp(sh); 865 } 866 #endif 867 868 /* Invoked periodically on Rearm Queue completions. */ 869 void 870 mlx5_txpp_interrupt_handler(void *cb_arg) 871 { 872 #ifndef HAVE_IBV_DEVX_EVENT 873 RTE_SET_USED(cb_arg); 874 return; 875 #else 876 struct mlx5_dev_ctx_shared *sh = cb_arg; 877 union { 878 struct mlx5dv_devx_async_event_hdr event_resp; 879 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128]; 880 } out; 881 882 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 883 /* Process events in the loop. Only rearm completions are expected. */ 884 while (mlx5_glue->devx_get_event 885 (sh->txpp.echan, 886 &out.event_resp, 887 sizeof(out.buf)) >= 888 (ssize_t)sizeof(out.event_resp.cookie)) { 889 mlx5_txpp_handle_rearm_queue(sh); 890 mlx5_txpp_handle_clock_queue(sh); 891 mlx5_txpp_cq_arm(sh); 892 mlx5_txpp_doorbell_rearm_queue 893 (sh, sh->txpp.rearm_queue.sq_ci - 1); 894 } 895 #endif /* HAVE_IBV_DEVX_ASYNC */ 896 } 897 898 static void 899 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh) 900 { 901 if (!sh->txpp.intr_handle.fd) 902 return; 903 mlx5_intr_callback_unregister(&sh->txpp.intr_handle, 904 mlx5_txpp_interrupt_handler, sh); 905 sh->txpp.intr_handle.fd = 0; 906 } 907 908 /* Attach interrupt handler and fires first request to Rearm Queue. */ 909 static int 910 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh) 911 { 912 uint16_t event_nums[1] = {0}; 913 int ret; 914 int fd; 915 916 sh->txpp.err_miss_int = 0; 917 sh->txpp.err_rearm_queue = 0; 918 sh->txpp.err_clock_queue = 0; 919 sh->txpp.err_ts_past = 0; 920 sh->txpp.err_ts_future = 0; 921 /* Attach interrupt handler to process Rearm Queue completions. */ 922 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 923 ret = mlx5_os_set_nonblock_channel_fd(fd); 924 if (ret) { 925 DRV_LOG(ERR, "Failed to change event channel FD."); 926 rte_errno = errno; 927 return -rte_errno; 928 } 929 memset(&sh->txpp.intr_handle, 0, sizeof(sh->txpp.intr_handle)); 930 fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan); 931 sh->txpp.intr_handle.fd = fd; 932 sh->txpp.intr_handle.type = RTE_INTR_HANDLE_EXT; 933 if (rte_intr_callback_register(&sh->txpp.intr_handle, 934 mlx5_txpp_interrupt_handler, sh)) { 935 sh->txpp.intr_handle.fd = 0; 936 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno); 937 return -rte_errno; 938 } 939 /* Subscribe CQ event to the event channel controlled by the driver. */ 940 ret = mlx5_glue->devx_subscribe_devx_event(sh->txpp.echan, 941 sh->txpp.rearm_queue.cq->obj, 942 sizeof(event_nums), 943 event_nums, 0); 944 if (ret) { 945 DRV_LOG(ERR, "Failed to subscribe CQE event."); 946 rte_errno = errno; 947 return -errno; 948 } 949 /* Enable interrupts in the CQ. */ 950 mlx5_txpp_cq_arm(sh); 951 /* Fire the first request on Rearm Queue. */ 952 mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1); 953 mlx5_txpp_init_timestamp(sh); 954 return 0; 955 } 956 957 /* 958 * The routine initializes the packet pacing infrastructure: 959 * - allocates PP context 960 * - Clock CQ/SQ 961 * - Rearm CQ/SQ 962 * - attaches rearm interrupt handler 963 * - starts Clock Queue 964 * 965 * Returns 0 on success, negative otherwise 966 */ 967 static int 968 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv) 969 { 970 int tx_pp = priv->config.tx_pp; 971 int ret; 972 973 /* Store the requested pacing parameters. */ 974 sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp; 975 sh->txpp.test = !!(tx_pp < 0); 976 sh->txpp.skew = priv->config.tx_skew; 977 sh->txpp.freq = priv->config.hca_attr.dev_freq_khz; 978 ret = mlx5_txpp_create_event_channel(sh); 979 if (ret) 980 goto exit; 981 ret = mlx5_txpp_alloc_pp_index(sh); 982 if (ret) 983 goto exit; 984 ret = mlx5_txpp_create_clock_queue(sh); 985 if (ret) 986 goto exit; 987 ret = mlx5_txpp_create_rearm_queue(sh); 988 if (ret) 989 goto exit; 990 ret = mlx5_txpp_start_service(sh); 991 if (ret) 992 goto exit; 993 exit: 994 if (ret) { 995 mlx5_txpp_stop_service(sh); 996 mlx5_txpp_destroy_rearm_queue(sh); 997 mlx5_txpp_destroy_clock_queue(sh); 998 mlx5_txpp_free_pp_index(sh); 999 mlx5_txpp_destroy_event_channel(sh); 1000 sh->txpp.tick = 0; 1001 sh->txpp.test = 0; 1002 sh->txpp.skew = 0; 1003 } 1004 return ret; 1005 } 1006 1007 /* 1008 * The routine destroys the packet pacing infrastructure: 1009 * - detaches rearm interrupt handler 1010 * - Rearm CQ/SQ 1011 * - Clock CQ/SQ 1012 * - PP context 1013 */ 1014 static void 1015 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh) 1016 { 1017 mlx5_txpp_stop_service(sh); 1018 mlx5_txpp_destroy_rearm_queue(sh); 1019 mlx5_txpp_destroy_clock_queue(sh); 1020 mlx5_txpp_free_pp_index(sh); 1021 mlx5_txpp_destroy_event_channel(sh); 1022 sh->txpp.tick = 0; 1023 sh->txpp.test = 0; 1024 sh->txpp.skew = 0; 1025 } 1026 1027 /** 1028 * Creates and starts packet pacing infrastructure on specified device. 1029 * 1030 * @param dev 1031 * Pointer to Ethernet device structure. 1032 * 1033 * @return 1034 * 0 on success, a negative errno value otherwise and rte_errno is set. 1035 */ 1036 int 1037 mlx5_txpp_start(struct rte_eth_dev *dev) 1038 { 1039 struct mlx5_priv *priv = dev->data->dev_private; 1040 struct mlx5_dev_ctx_shared *sh = priv->sh; 1041 int err = 0; 1042 int ret; 1043 1044 if (!priv->config.tx_pp) { 1045 /* Packet pacing is not requested for the device. */ 1046 MLX5_ASSERT(priv->txpp_en == 0); 1047 return 0; 1048 } 1049 if (priv->txpp_en) { 1050 /* Packet pacing is already enabled for the device. */ 1051 MLX5_ASSERT(sh->txpp.refcnt); 1052 return 0; 1053 } 1054 if (priv->config.tx_pp > 0) { 1055 ret = rte_mbuf_dynflag_lookup 1056 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1057 if (ret < 0) 1058 return 0; 1059 } 1060 ret = pthread_mutex_lock(&sh->txpp.mutex); 1061 MLX5_ASSERT(!ret); 1062 RTE_SET_USED(ret); 1063 if (sh->txpp.refcnt) { 1064 priv->txpp_en = 1; 1065 ++sh->txpp.refcnt; 1066 } else { 1067 err = mlx5_txpp_create(sh, priv); 1068 if (!err) { 1069 MLX5_ASSERT(sh->txpp.tick); 1070 priv->txpp_en = 1; 1071 sh->txpp.refcnt = 1; 1072 } else { 1073 rte_errno = -err; 1074 } 1075 } 1076 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1077 MLX5_ASSERT(!ret); 1078 RTE_SET_USED(ret); 1079 return err; 1080 } 1081 1082 /** 1083 * Stops and destroys packet pacing infrastructure on specified device. 1084 * 1085 * @param dev 1086 * Pointer to Ethernet device structure. 1087 * 1088 * @return 1089 * 0 on success, a negative errno value otherwise and rte_errno is set. 1090 */ 1091 void 1092 mlx5_txpp_stop(struct rte_eth_dev *dev) 1093 { 1094 struct mlx5_priv *priv = dev->data->dev_private; 1095 struct mlx5_dev_ctx_shared *sh = priv->sh; 1096 int ret; 1097 1098 if (!priv->txpp_en) { 1099 /* Packet pacing is already disabled for the device. */ 1100 return; 1101 } 1102 priv->txpp_en = 0; 1103 ret = pthread_mutex_lock(&sh->txpp.mutex); 1104 MLX5_ASSERT(!ret); 1105 RTE_SET_USED(ret); 1106 MLX5_ASSERT(sh->txpp.refcnt); 1107 if (!sh->txpp.refcnt || --sh->txpp.refcnt) 1108 return; 1109 /* No references any more, do actual destroy. */ 1110 mlx5_txpp_destroy(sh); 1111 ret = pthread_mutex_unlock(&sh->txpp.mutex); 1112 MLX5_ASSERT(!ret); 1113 RTE_SET_USED(ret); 1114 } 1115 1116 /* 1117 * Read the current clock counter of an Ethernet device 1118 * 1119 * This returns the current raw clock value of an Ethernet device. It is 1120 * a raw amount of ticks, with no given time reference. 1121 * The value returned here is from the same clock than the one 1122 * filling timestamp field of Rx/Tx packets when using hardware timestamp 1123 * offload. Therefore it can be used to compute a precise conversion of 1124 * the device clock to the real time. 1125 * 1126 * @param dev 1127 * Pointer to Ethernet device structure. 1128 * @param clock 1129 * Pointer to the uint64_t that holds the raw clock value. 1130 * 1131 * @return 1132 * - 0: Success. 1133 * - -ENOTSUP: The function is not supported in this mode. Requires 1134 * packet pacing module configured and started (tx_pp devarg) 1135 */ 1136 int 1137 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp) 1138 { 1139 struct mlx5_priv *priv = dev->data->dev_private; 1140 struct mlx5_dev_ctx_shared *sh = priv->sh; 1141 int ret; 1142 1143 if (sh->txpp.refcnt) { 1144 struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue; 1145 struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cqes; 1146 union { 1147 rte_int128_t u128; 1148 struct mlx5_cqe_ts cts; 1149 } to; 1150 uint64_t ts; 1151 1152 mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); 1153 if (to.cts.op_own >> 4) { 1154 DRV_LOG(DEBUG, "Clock Queue error sync lost."); 1155 __atomic_fetch_add(&sh->txpp.err_clock_queue, 1156 1, __ATOMIC_RELAXED); 1157 sh->txpp.sync_lost = 1; 1158 return -EIO; 1159 } 1160 ts = rte_be_to_cpu_64(to.cts.timestamp); 1161 ts = mlx5_txpp_convert_rx_ts(sh, ts); 1162 *timestamp = ts; 1163 return 0; 1164 } 1165 /* Not supported in isolated mode - kernel does not see the CQEs. */ 1166 if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY) 1167 return -ENOTSUP; 1168 ret = mlx5_read_clock(dev, timestamp); 1169 return ret; 1170 } 1171 1172 /** 1173 * DPDK callback to clear device extended statistics. 1174 * 1175 * @param dev 1176 * Pointer to Ethernet device structure. 1177 * 1178 * @return 1179 * 0 on success and stats is reset, negative errno value otherwise and 1180 * rte_errno is set. 1181 */ 1182 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev) 1183 { 1184 struct mlx5_priv *priv = dev->data->dev_private; 1185 struct mlx5_dev_ctx_shared *sh = priv->sh; 1186 1187 __atomic_store_n(&sh->txpp.err_miss_int, 0, __ATOMIC_RELAXED); 1188 __atomic_store_n(&sh->txpp.err_rearm_queue, 0, __ATOMIC_RELAXED); 1189 __atomic_store_n(&sh->txpp.err_clock_queue, 0, __ATOMIC_RELAXED); 1190 __atomic_store_n(&sh->txpp.err_ts_past, 0, __ATOMIC_RELAXED); 1191 __atomic_store_n(&sh->txpp.err_ts_future, 0, __ATOMIC_RELAXED); 1192 return 0; 1193 } 1194 1195 /** 1196 * Routine to retrieve names of extended device statistics 1197 * for packet send scheduling. It appends the specific stats names 1198 * after the parts filled by preceding modules (eth stats, etc.) 1199 * 1200 * @param dev 1201 * Pointer to Ethernet device structure. 1202 * @param[out] xstats_names 1203 * Buffer to insert names into. 1204 * @param n 1205 * Number of names. 1206 * @param n_used 1207 * Number of names filled by preceding statistics modules. 1208 * 1209 * @return 1210 * Number of xstats names. 1211 */ 1212 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 1213 struct rte_eth_xstat_name *xstats_names, 1214 unsigned int n, unsigned int n_used) 1215 { 1216 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1217 unsigned int i; 1218 1219 if (n >= n_used + n_txpp && xstats_names) { 1220 for (i = 0; i < n_txpp; ++i) { 1221 strncpy(xstats_names[i + n_used].name, 1222 mlx5_txpp_stat_names[i], 1223 RTE_ETH_XSTATS_NAME_SIZE); 1224 xstats_names[i + n_used].name 1225 [RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; 1226 } 1227 } 1228 return n_used + n_txpp; 1229 } 1230 1231 static inline void 1232 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp, 1233 struct mlx5_txpp_ts *tsa, uint16_t idx) 1234 { 1235 do { 1236 uint64_t ts, ci; 1237 1238 ts = __atomic_load_n(&txpp->tsa[idx].ts, __ATOMIC_RELAXED); 1239 ci = __atomic_load_n(&txpp->tsa[idx].ci_ts, __ATOMIC_RELAXED); 1240 rte_compiler_barrier(); 1241 if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0) 1242 continue; 1243 if (__atomic_load_n(&txpp->tsa[idx].ts, 1244 __ATOMIC_RELAXED) != ts) 1245 continue; 1246 if (__atomic_load_n(&txpp->tsa[idx].ci_ts, 1247 __ATOMIC_RELAXED) != ci) 1248 continue; 1249 tsa->ts = ts; 1250 tsa->ci_ts = ci; 1251 return; 1252 } while (true); 1253 } 1254 1255 /* 1256 * Jitter reflects the clock change between 1257 * neighbours Clock Queue completions. 1258 */ 1259 static uint64_t 1260 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp) 1261 { 1262 struct mlx5_txpp_ts tsa0, tsa1; 1263 int64_t dts, dci; 1264 uint16_t ts_p; 1265 1266 if (txpp->ts_n < 2) { 1267 /* No gathered enough reports yet. */ 1268 return 0; 1269 } 1270 do { 1271 int ts_0, ts_1; 1272 1273 ts_p = txpp->ts_p; 1274 rte_compiler_barrier(); 1275 ts_0 = ts_p - 2; 1276 if (ts_0 < 0) 1277 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1278 ts_1 = ts_p - 1; 1279 if (ts_1 < 0) 1280 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1281 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1282 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1283 rte_compiler_barrier(); 1284 } while (ts_p != txpp->ts_p); 1285 /* We have two neighbor reports, calculate the jitter. */ 1286 dts = tsa1.ts - tsa0.ts; 1287 dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1288 (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); 1289 if (dci < 0) 1290 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1291 dci *= txpp->tick; 1292 return (dts > dci) ? dts - dci : dci - dts; 1293 } 1294 1295 /* 1296 * Wander reflects the long-term clock change 1297 * over the entire length of all Clock Queue completions. 1298 */ 1299 static uint64_t 1300 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp) 1301 { 1302 struct mlx5_txpp_ts tsa0, tsa1; 1303 int64_t dts, dci; 1304 uint16_t ts_p; 1305 1306 if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) { 1307 /* No gathered enough reports yet. */ 1308 return 0; 1309 } 1310 do { 1311 int ts_0, ts_1; 1312 1313 ts_p = txpp->ts_p; 1314 rte_compiler_barrier(); 1315 ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1; 1316 if (ts_0 < 0) 1317 ts_0 += MLX5_TXPP_REARM_SQ_SIZE; 1318 ts_1 = ts_p - 1; 1319 if (ts_1 < 0) 1320 ts_1 += MLX5_TXPP_REARM_SQ_SIZE; 1321 mlx5_txpp_read_tsa(txpp, &tsa0, ts_0); 1322 mlx5_txpp_read_tsa(txpp, &tsa1, ts_1); 1323 rte_compiler_barrier(); 1324 } while (ts_p != txpp->ts_p); 1325 /* We have two neighbor reports, calculate the jitter. */ 1326 dts = tsa1.ts - tsa0.ts; 1327 dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) - 1328 (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)); 1329 dci += 1 << MLX5_CQ_INDEX_WIDTH; 1330 dci *= txpp->tick; 1331 return (dts > dci) ? dts - dci : dci - dts; 1332 } 1333 1334 /** 1335 * Routine to retrieve extended device statistics 1336 * for packet send scheduling. It appends the specific statistics 1337 * after the parts filled by preceding modules (eth stats, etc.) 1338 * 1339 * @param dev 1340 * Pointer to Ethernet device. 1341 * @param[out] stats 1342 * Pointer to rte extended stats table. 1343 * @param n 1344 * The size of the stats table. 1345 * @param n_used 1346 * Number of stats filled by preceding statistics modules. 1347 * 1348 * @return 1349 * Number of extended stats on success and stats is filled, 1350 * negative on error and rte_errno is set. 1351 */ 1352 int 1353 mlx5_txpp_xstats_get(struct rte_eth_dev *dev, 1354 struct rte_eth_xstat *stats, 1355 unsigned int n, unsigned int n_used) 1356 { 1357 unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names); 1358 1359 if (n >= n_used + n_txpp && stats) { 1360 struct mlx5_priv *priv = dev->data->dev_private; 1361 struct mlx5_dev_ctx_shared *sh = priv->sh; 1362 unsigned int i; 1363 1364 for (i = 0; i < n_txpp; ++i) 1365 stats[n_used + i].id = n_used + i; 1366 stats[n_used + 0].value = 1367 __atomic_load_n(&sh->txpp.err_miss_int, 1368 __ATOMIC_RELAXED); 1369 stats[n_used + 1].value = 1370 __atomic_load_n(&sh->txpp.err_rearm_queue, 1371 __ATOMIC_RELAXED); 1372 stats[n_used + 2].value = 1373 __atomic_load_n(&sh->txpp.err_clock_queue, 1374 __ATOMIC_RELAXED); 1375 stats[n_used + 3].value = 1376 __atomic_load_n(&sh->txpp.err_ts_past, 1377 __ATOMIC_RELAXED); 1378 stats[n_used + 4].value = 1379 __atomic_load_n(&sh->txpp.err_ts_future, 1380 __ATOMIC_RELAXED); 1381 stats[n_used + 5].value = mlx5_txpp_xstats_jitter(&sh->txpp); 1382 stats[n_used + 6].value = mlx5_txpp_xstats_wander(&sh->txpp); 1383 stats[n_used + 7].value = sh->txpp.sync_lost; 1384 } 1385 return n_used + n_txpp; 1386 } 1387