1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <unistd.h> 6 #include <sys/types.h> 7 #include <stdbool.h> 8 #include <math.h> 9 10 #include <rte_string_fns.h> 11 #include <rte_mbuf.h> 12 #include <rte_mbuf_dyn.h> 13 #include <rte_log.h> 14 #include <rte_cycles.h> 15 #include <rte_ethdev.h> 16 #include <rte_metrics.h> 17 #include <rte_memzone.h> 18 #include <rte_lcore.h> 19 20 #include "rte_latencystats.h" 21 22 /** Nano seconds per second */ 23 #define NS_PER_SEC 1E9 24 25 /** Clock cycles per nano second */ 26 static uint64_t 27 latencystat_cycles_per_ns(void) 28 { 29 return rte_get_timer_hz() / NS_PER_SEC; 30 } 31 32 /* Macros for printing using RTE_LOG */ 33 #define RTE_LOGTYPE_LATENCY_STATS RTE_LOGTYPE_USER1 34 35 static uint64_t timestamp_dynflag; 36 static int timestamp_dynfield_offset = -1; 37 38 static inline rte_mbuf_timestamp_t * 39 timestamp_dynfield(struct rte_mbuf *mbuf) 40 { 41 return RTE_MBUF_DYNFIELD(mbuf, 42 timestamp_dynfield_offset, rte_mbuf_timestamp_t *); 43 } 44 45 static const char *MZ_RTE_LATENCY_STATS = "rte_latencystats"; 46 static int latency_stats_index; 47 static uint64_t samp_intvl; 48 static uint64_t timer_tsc; 49 static uint64_t prev_tsc; 50 51 struct rte_latency_stats { 52 float min_latency; /**< Minimum latency in nano seconds */ 53 float avg_latency; /**< Average latency in nano seconds */ 54 float max_latency; /**< Maximum latency in nano seconds */ 55 float jitter; /** Latency variation */ 56 rte_spinlock_t lock; /** Latency calculation lock */ 57 }; 58 59 static struct rte_latency_stats *glob_stats; 60 61 struct rxtx_cbs { 62 const struct rte_eth_rxtx_callback *cb; 63 }; 64 65 static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; 66 static struct rxtx_cbs tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; 67 68 struct latency_stats_nameoff { 69 char name[RTE_ETH_XSTATS_NAME_SIZE]; 70 unsigned int offset; 71 }; 72 73 static const struct latency_stats_nameoff lat_stats_strings[] = { 74 {"min_latency_ns", offsetof(struct rte_latency_stats, min_latency)}, 75 {"avg_latency_ns", offsetof(struct rte_latency_stats, avg_latency)}, 76 {"max_latency_ns", offsetof(struct rte_latency_stats, max_latency)}, 77 {"jitter_ns", offsetof(struct rte_latency_stats, jitter)}, 78 }; 79 80 #define NUM_LATENCY_STATS (sizeof(lat_stats_strings) / \ 81 sizeof(lat_stats_strings[0])) 82 83 int32_t 84 rte_latencystats_update(void) 85 { 86 unsigned int i; 87 float *stats_ptr = NULL; 88 uint64_t values[NUM_LATENCY_STATS] = {0}; 89 int ret; 90 91 for (i = 0; i < NUM_LATENCY_STATS; i++) { 92 stats_ptr = RTE_PTR_ADD(glob_stats, 93 lat_stats_strings[i].offset); 94 values[i] = (uint64_t)floor((*stats_ptr)/ 95 latencystat_cycles_per_ns()); 96 } 97 98 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, 99 latency_stats_index, 100 values, NUM_LATENCY_STATS); 101 if (ret < 0) 102 RTE_LOG(INFO, LATENCY_STATS, "Failed to push the stats\n"); 103 104 return ret; 105 } 106 107 static void 108 rte_latencystats_fill_values(struct rte_metric_value *values) 109 { 110 unsigned int i; 111 float *stats_ptr = NULL; 112 113 for (i = 0; i < NUM_LATENCY_STATS; i++) { 114 stats_ptr = RTE_PTR_ADD(glob_stats, 115 lat_stats_strings[i].offset); 116 values[i].key = i; 117 values[i].value = (uint64_t)floor((*stats_ptr)/ 118 latencystat_cycles_per_ns()); 119 } 120 } 121 122 static uint16_t 123 add_time_stamps(uint16_t pid __rte_unused, 124 uint16_t qid __rte_unused, 125 struct rte_mbuf **pkts, 126 uint16_t nb_pkts, 127 uint16_t max_pkts __rte_unused, 128 void *user_cb __rte_unused) 129 { 130 unsigned int i; 131 uint64_t diff_tsc, now; 132 133 /* 134 * For every sample interval, 135 * time stamp is marked on one received packet. 136 */ 137 now = rte_rdtsc(); 138 for (i = 0; i < nb_pkts; i++) { 139 diff_tsc = now - prev_tsc; 140 timer_tsc += diff_tsc; 141 142 if ((pkts[i]->ol_flags & timestamp_dynflag) == 0 143 && (timer_tsc >= samp_intvl)) { 144 *timestamp_dynfield(pkts[i]) = now; 145 pkts[i]->ol_flags |= timestamp_dynflag; 146 timer_tsc = 0; 147 } 148 prev_tsc = now; 149 now = rte_rdtsc(); 150 } 151 152 return nb_pkts; 153 } 154 155 static uint16_t 156 calc_latency(uint16_t pid __rte_unused, 157 uint16_t qid __rte_unused, 158 struct rte_mbuf **pkts, 159 uint16_t nb_pkts, 160 void *_ __rte_unused) 161 { 162 unsigned int i, cnt = 0; 163 uint64_t now; 164 float latency[nb_pkts]; 165 static float prev_latency; 166 /* 167 * Alpha represents degree of weighting decrease in EWMA, 168 * a constant smoothing factor between 0 and 1. The value 169 * is used below for measuring average latency. 170 */ 171 const float alpha = 0.2; 172 173 now = rte_rdtsc(); 174 for (i = 0; i < nb_pkts; i++) { 175 if (pkts[i]->ol_flags & timestamp_dynflag) 176 latency[cnt++] = now - *timestamp_dynfield(pkts[i]); 177 } 178 179 rte_spinlock_lock(&glob_stats->lock); 180 for (i = 0; i < cnt; i++) { 181 /* 182 * The jitter is calculated as statistical mean of interpacket 183 * delay variation. The "jitter estimate" is computed by taking 184 * the absolute values of the ipdv sequence and applying an 185 * exponential filter with parameter 1/16 to generate the 186 * estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is jitter, 187 * D(i-1,i) is difference in latency of two consecutive packets 188 * i-1 and i. 189 * Reference: Calculated as per RFC 5481, sec 4.1, 190 * RFC 3393 sec 4.5, RFC 1889 sec. 191 */ 192 glob_stats->jitter += (fabsf(prev_latency - latency[i]) 193 - glob_stats->jitter)/16; 194 if (glob_stats->min_latency == 0) 195 glob_stats->min_latency = latency[i]; 196 else if (latency[i] < glob_stats->min_latency) 197 glob_stats->min_latency = latency[i]; 198 else if (latency[i] > glob_stats->max_latency) 199 glob_stats->max_latency = latency[i]; 200 /* 201 * The average latency is measured using exponential moving 202 * average, i.e. using EWMA 203 * https://en.wikipedia.org/wiki/Moving_average 204 */ 205 glob_stats->avg_latency += 206 alpha * (latency[i] - glob_stats->avg_latency); 207 prev_latency = latency[i]; 208 } 209 rte_spinlock_unlock(&glob_stats->lock); 210 211 return nb_pkts; 212 } 213 214 int 215 rte_latencystats_init(uint64_t app_samp_intvl, 216 rte_latency_stats_flow_type_fn user_cb) 217 { 218 unsigned int i; 219 uint16_t pid; 220 uint16_t qid; 221 struct rxtx_cbs *cbs = NULL; 222 const char *ptr_strings[NUM_LATENCY_STATS] = {0}; 223 const struct rte_memzone *mz = NULL; 224 const unsigned int flags = 0; 225 int ret; 226 227 if (rte_memzone_lookup(MZ_RTE_LATENCY_STATS)) 228 return -EEXIST; 229 230 /** Allocate stats in shared memory fo multi process support */ 231 mz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats), 232 rte_socket_id(), flags); 233 if (mz == NULL) { 234 RTE_LOG(ERR, LATENCY_STATS, "Cannot reserve memory: %s:%d\n", 235 __func__, __LINE__); 236 return -ENOMEM; 237 } 238 239 glob_stats = mz->addr; 240 rte_spinlock_init(&glob_stats->lock); 241 samp_intvl = app_samp_intvl * latencystat_cycles_per_ns(); 242 243 /** Register latency stats with stats library */ 244 for (i = 0; i < NUM_LATENCY_STATS; i++) 245 ptr_strings[i] = lat_stats_strings[i].name; 246 247 latency_stats_index = rte_metrics_reg_names(ptr_strings, 248 NUM_LATENCY_STATS); 249 if (latency_stats_index < 0) { 250 RTE_LOG(DEBUG, LATENCY_STATS, 251 "Failed to register latency stats names\n"); 252 return -1; 253 } 254 255 /* Register mbuf field and flag for Rx timestamp */ 256 ret = rte_mbuf_dyn_rx_timestamp_register(×tamp_dynfield_offset, 257 ×tamp_dynflag); 258 if (ret != 0) { 259 RTE_LOG(ERR, LATENCY_STATS, 260 "Cannot register mbuf field/flag for timestamp\n"); 261 return -rte_errno; 262 } 263 264 /** Register Rx/Tx callbacks */ 265 RTE_ETH_FOREACH_DEV(pid) { 266 struct rte_eth_dev_info dev_info; 267 268 ret = rte_eth_dev_info_get(pid, &dev_info); 269 if (ret != 0) { 270 RTE_LOG(INFO, LATENCY_STATS, 271 "Error during getting device (port %u) info: %s\n", 272 pid, strerror(-ret)); 273 274 continue; 275 } 276 277 for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { 278 cbs = &rx_cbs[pid][qid]; 279 cbs->cb = rte_eth_add_first_rx_callback(pid, qid, 280 add_time_stamps, user_cb); 281 if (!cbs->cb) 282 RTE_LOG(INFO, LATENCY_STATS, "Failed to " 283 "register Rx callback for pid=%d, " 284 "qid=%d\n", pid, qid); 285 } 286 for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { 287 cbs = &tx_cbs[pid][qid]; 288 cbs->cb = rte_eth_add_tx_callback(pid, qid, 289 calc_latency, user_cb); 290 if (!cbs->cb) 291 RTE_LOG(INFO, LATENCY_STATS, "Failed to " 292 "register Tx callback for pid=%d, " 293 "qid=%d\n", pid, qid); 294 } 295 } 296 return 0; 297 } 298 299 int 300 rte_latencystats_uninit(void) 301 { 302 uint16_t pid; 303 uint16_t qid; 304 int ret = 0; 305 struct rxtx_cbs *cbs = NULL; 306 const struct rte_memzone *mz = NULL; 307 308 /** De register Rx/Tx callbacks */ 309 RTE_ETH_FOREACH_DEV(pid) { 310 struct rte_eth_dev_info dev_info; 311 312 ret = rte_eth_dev_info_get(pid, &dev_info); 313 if (ret != 0) { 314 RTE_LOG(INFO, LATENCY_STATS, 315 "Error during getting device (port %u) info: %s\n", 316 pid, strerror(-ret)); 317 318 continue; 319 } 320 321 for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { 322 cbs = &rx_cbs[pid][qid]; 323 ret = rte_eth_remove_rx_callback(pid, qid, cbs->cb); 324 if (ret) 325 RTE_LOG(INFO, LATENCY_STATS, "failed to " 326 "remove Rx callback for pid=%d, " 327 "qid=%d\n", pid, qid); 328 } 329 for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { 330 cbs = &tx_cbs[pid][qid]; 331 ret = rte_eth_remove_tx_callback(pid, qid, cbs->cb); 332 if (ret) 333 RTE_LOG(INFO, LATENCY_STATS, "failed to " 334 "remove Tx callback for pid=%d, " 335 "qid=%d\n", pid, qid); 336 } 337 } 338 339 /* free up the memzone */ 340 mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); 341 if (mz) 342 rte_memzone_free(mz); 343 344 return 0; 345 } 346 347 int 348 rte_latencystats_get_names(struct rte_metric_name *names, uint16_t size) 349 { 350 unsigned int i; 351 352 if (names == NULL || size < NUM_LATENCY_STATS) 353 return NUM_LATENCY_STATS; 354 355 for (i = 0; i < NUM_LATENCY_STATS; i++) 356 strlcpy(names[i].name, lat_stats_strings[i].name, 357 sizeof(names[i].name)); 358 359 return NUM_LATENCY_STATS; 360 } 361 362 int 363 rte_latencystats_get(struct rte_metric_value *values, uint16_t size) 364 { 365 if (size < NUM_LATENCY_STATS || values == NULL) 366 return NUM_LATENCY_STATS; 367 368 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 369 const struct rte_memzone *mz; 370 mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); 371 if (mz == NULL) { 372 RTE_LOG(ERR, LATENCY_STATS, 373 "Latency stats memzone not found\n"); 374 return -ENOMEM; 375 } 376 glob_stats = mz->addr; 377 } 378 379 /* Retrieve latency stats */ 380 rte_latencystats_fill_values(values); 381 382 return NUM_LATENCY_STATS; 383 } 384