1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <math.h> 6 7 #include <rte_string_fns.h> 8 #include <rte_mbuf_dyn.h> 9 #include <rte_log.h> 10 #include <rte_cycles.h> 11 #include <rte_ethdev.h> 12 #include <rte_metrics.h> 13 #include <rte_memzone.h> 14 #include <rte_lcore.h> 15 16 #include "rte_latencystats.h" 17 18 /** Nano seconds per second */ 19 #define NS_PER_SEC 1E9 20 21 /** Clock cycles per nano second */ 22 static uint64_t 23 latencystat_cycles_per_ns(void) 24 { 25 return rte_get_timer_hz() / NS_PER_SEC; 26 } 27 28 /* Macros for printing using RTE_LOG */ 29 #define RTE_LOGTYPE_LATENCY_STATS RTE_LOGTYPE_USER1 30 31 static uint64_t timestamp_dynflag; 32 static int timestamp_dynfield_offset = -1; 33 34 static inline rte_mbuf_timestamp_t * 35 timestamp_dynfield(struct rte_mbuf *mbuf) 36 { 37 return RTE_MBUF_DYNFIELD(mbuf, 38 timestamp_dynfield_offset, rte_mbuf_timestamp_t *); 39 } 40 41 static const char *MZ_RTE_LATENCY_STATS = "rte_latencystats"; 42 static int latency_stats_index; 43 static uint64_t samp_intvl; 44 static uint64_t timer_tsc; 45 static uint64_t prev_tsc; 46 47 struct rte_latency_stats { 48 float min_latency; /**< Minimum latency in nano seconds */ 49 float avg_latency; /**< Average latency in nano seconds */ 50 float max_latency; /**< Maximum latency in nano seconds */ 51 float jitter; /** Latency variation */ 52 rte_spinlock_t lock; /** Latency calculation lock */ 53 }; 54 55 static struct rte_latency_stats *glob_stats; 56 57 struct rxtx_cbs { 58 const struct rte_eth_rxtx_callback *cb; 59 }; 60 61 static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; 62 static struct rxtx_cbs tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; 63 64 struct latency_stats_nameoff { 65 char name[RTE_ETH_XSTATS_NAME_SIZE]; 66 unsigned int offset; 67 }; 68 69 static const struct latency_stats_nameoff lat_stats_strings[] = { 70 {"min_latency_ns", offsetof(struct rte_latency_stats, min_latency)}, 71 {"avg_latency_ns", offsetof(struct rte_latency_stats, avg_latency)}, 72 {"max_latency_ns", offsetof(struct rte_latency_stats, max_latency)}, 73 {"jitter_ns", offsetof(struct rte_latency_stats, jitter)}, 74 }; 75 76 #define NUM_LATENCY_STATS (sizeof(lat_stats_strings) / \ 77 sizeof(lat_stats_strings[0])) 78 79 int32_t 80 rte_latencystats_update(void) 81 { 82 unsigned int i; 83 float *stats_ptr = NULL; 84 uint64_t values[NUM_LATENCY_STATS] = {0}; 85 int ret; 86 87 for (i = 0; i < NUM_LATENCY_STATS; i++) { 88 stats_ptr = RTE_PTR_ADD(glob_stats, 89 lat_stats_strings[i].offset); 90 values[i] = (uint64_t)floor((*stats_ptr)/ 91 latencystat_cycles_per_ns()); 92 } 93 94 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, 95 latency_stats_index, 96 values, NUM_LATENCY_STATS); 97 if (ret < 0) 98 RTE_LOG(INFO, LATENCY_STATS, "Failed to push the stats\n"); 99 100 return ret; 101 } 102 103 static void 104 rte_latencystats_fill_values(struct rte_metric_value *values) 105 { 106 unsigned int i; 107 float *stats_ptr = NULL; 108 109 for (i = 0; i < NUM_LATENCY_STATS; i++) { 110 stats_ptr = RTE_PTR_ADD(glob_stats, 111 lat_stats_strings[i].offset); 112 values[i].key = i; 113 values[i].value = (uint64_t)floor((*stats_ptr)/ 114 latencystat_cycles_per_ns()); 115 } 116 } 117 118 static uint16_t 119 add_time_stamps(uint16_t pid __rte_unused, 120 uint16_t qid __rte_unused, 121 struct rte_mbuf **pkts, 122 uint16_t nb_pkts, 123 uint16_t max_pkts __rte_unused, 124 void *user_cb __rte_unused) 125 { 126 unsigned int i; 127 uint64_t diff_tsc, now; 128 129 /* 130 * For every sample interval, 131 * time stamp is marked on one received packet. 132 */ 133 now = rte_rdtsc(); 134 for (i = 0; i < nb_pkts; i++) { 135 diff_tsc = now - prev_tsc; 136 timer_tsc += diff_tsc; 137 138 if ((pkts[i]->ol_flags & timestamp_dynflag) == 0 139 && (timer_tsc >= samp_intvl)) { 140 *timestamp_dynfield(pkts[i]) = now; 141 pkts[i]->ol_flags |= timestamp_dynflag; 142 timer_tsc = 0; 143 } 144 prev_tsc = now; 145 now = rte_rdtsc(); 146 } 147 148 return nb_pkts; 149 } 150 151 static uint16_t 152 calc_latency(uint16_t pid __rte_unused, 153 uint16_t qid __rte_unused, 154 struct rte_mbuf **pkts, 155 uint16_t nb_pkts, 156 void *_ __rte_unused) 157 { 158 unsigned int i, cnt = 0; 159 uint64_t now; 160 float latency[nb_pkts]; 161 static float prev_latency; 162 /* 163 * Alpha represents degree of weighting decrease in EWMA, 164 * a constant smoothing factor between 0 and 1. The value 165 * is used below for measuring average latency. 166 */ 167 const float alpha = 0.2; 168 169 now = rte_rdtsc(); 170 for (i = 0; i < nb_pkts; i++) { 171 if (pkts[i]->ol_flags & timestamp_dynflag) 172 latency[cnt++] = now - *timestamp_dynfield(pkts[i]); 173 } 174 175 rte_spinlock_lock(&glob_stats->lock); 176 for (i = 0; i < cnt; i++) { 177 /* 178 * The jitter is calculated as statistical mean of interpacket 179 * delay variation. The "jitter estimate" is computed by taking 180 * the absolute values of the ipdv sequence and applying an 181 * exponential filter with parameter 1/16 to generate the 182 * estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is jitter, 183 * D(i-1,i) is difference in latency of two consecutive packets 184 * i-1 and i. 185 * Reference: Calculated as per RFC 5481, sec 4.1, 186 * RFC 3393 sec 4.5, RFC 1889 sec. 187 */ 188 glob_stats->jitter += (fabsf(prev_latency - latency[i]) 189 - glob_stats->jitter)/16; 190 if (glob_stats->min_latency == 0) 191 glob_stats->min_latency = latency[i]; 192 else if (latency[i] < glob_stats->min_latency) 193 glob_stats->min_latency = latency[i]; 194 else if (latency[i] > glob_stats->max_latency) 195 glob_stats->max_latency = latency[i]; 196 /* 197 * The average latency is measured using exponential moving 198 * average, i.e. using EWMA 199 * https://en.wikipedia.org/wiki/Moving_average 200 */ 201 glob_stats->avg_latency += 202 alpha * (latency[i] - glob_stats->avg_latency); 203 prev_latency = latency[i]; 204 } 205 rte_spinlock_unlock(&glob_stats->lock); 206 207 return nb_pkts; 208 } 209 210 int 211 rte_latencystats_init(uint64_t app_samp_intvl, 212 rte_latency_stats_flow_type_fn user_cb) 213 { 214 unsigned int i; 215 uint16_t pid; 216 uint16_t qid; 217 struct rxtx_cbs *cbs = NULL; 218 const char *ptr_strings[NUM_LATENCY_STATS] = {0}; 219 const struct rte_memzone *mz = NULL; 220 const unsigned int flags = 0; 221 int ret; 222 223 if (rte_memzone_lookup(MZ_RTE_LATENCY_STATS)) 224 return -EEXIST; 225 226 /** Allocate stats in shared memory fo multi process support */ 227 mz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats), 228 rte_socket_id(), flags); 229 if (mz == NULL) { 230 RTE_LOG(ERR, LATENCY_STATS, "Cannot reserve memory: %s:%d\n", 231 __func__, __LINE__); 232 return -ENOMEM; 233 } 234 235 glob_stats = mz->addr; 236 rte_spinlock_init(&glob_stats->lock); 237 samp_intvl = app_samp_intvl * latencystat_cycles_per_ns(); 238 239 /** Register latency stats with stats library */ 240 for (i = 0; i < NUM_LATENCY_STATS; i++) 241 ptr_strings[i] = lat_stats_strings[i].name; 242 243 latency_stats_index = rte_metrics_reg_names(ptr_strings, 244 NUM_LATENCY_STATS); 245 if (latency_stats_index < 0) { 246 RTE_LOG(DEBUG, LATENCY_STATS, 247 "Failed to register latency stats names\n"); 248 return -1; 249 } 250 251 /* Register mbuf field and flag for Rx timestamp */ 252 ret = rte_mbuf_dyn_rx_timestamp_register(×tamp_dynfield_offset, 253 ×tamp_dynflag); 254 if (ret != 0) { 255 RTE_LOG(ERR, LATENCY_STATS, 256 "Cannot register mbuf field/flag for timestamp\n"); 257 return -rte_errno; 258 } 259 260 /** Register Rx/Tx callbacks */ 261 RTE_ETH_FOREACH_DEV(pid) { 262 struct rte_eth_dev_info dev_info; 263 264 ret = rte_eth_dev_info_get(pid, &dev_info); 265 if (ret != 0) { 266 RTE_LOG(INFO, LATENCY_STATS, 267 "Error during getting device (port %u) info: %s\n", 268 pid, strerror(-ret)); 269 270 continue; 271 } 272 273 for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { 274 cbs = &rx_cbs[pid][qid]; 275 cbs->cb = rte_eth_add_first_rx_callback(pid, qid, 276 add_time_stamps, user_cb); 277 if (!cbs->cb) 278 RTE_LOG(INFO, LATENCY_STATS, "Failed to " 279 "register Rx callback for pid=%d, " 280 "qid=%d\n", pid, qid); 281 } 282 for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { 283 cbs = &tx_cbs[pid][qid]; 284 cbs->cb = rte_eth_add_tx_callback(pid, qid, 285 calc_latency, user_cb); 286 if (!cbs->cb) 287 RTE_LOG(INFO, LATENCY_STATS, "Failed to " 288 "register Tx callback for pid=%d, " 289 "qid=%d\n", pid, qid); 290 } 291 } 292 return 0; 293 } 294 295 int 296 rte_latencystats_uninit(void) 297 { 298 uint16_t pid; 299 uint16_t qid; 300 int ret = 0; 301 struct rxtx_cbs *cbs = NULL; 302 const struct rte_memzone *mz = NULL; 303 304 /** De register Rx/Tx callbacks */ 305 RTE_ETH_FOREACH_DEV(pid) { 306 struct rte_eth_dev_info dev_info; 307 308 ret = rte_eth_dev_info_get(pid, &dev_info); 309 if (ret != 0) { 310 RTE_LOG(INFO, LATENCY_STATS, 311 "Error during getting device (port %u) info: %s\n", 312 pid, strerror(-ret)); 313 314 continue; 315 } 316 317 for (qid = 0; qid < dev_info.nb_rx_queues; qid++) { 318 cbs = &rx_cbs[pid][qid]; 319 ret = rte_eth_remove_rx_callback(pid, qid, cbs->cb); 320 if (ret) 321 RTE_LOG(INFO, LATENCY_STATS, "failed to " 322 "remove Rx callback for pid=%d, " 323 "qid=%d\n", pid, qid); 324 } 325 for (qid = 0; qid < dev_info.nb_tx_queues; qid++) { 326 cbs = &tx_cbs[pid][qid]; 327 ret = rte_eth_remove_tx_callback(pid, qid, cbs->cb); 328 if (ret) 329 RTE_LOG(INFO, LATENCY_STATS, "failed to " 330 "remove Tx callback for pid=%d, " 331 "qid=%d\n", pid, qid); 332 } 333 } 334 335 /* free up the memzone */ 336 mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); 337 if (mz) 338 rte_memzone_free(mz); 339 340 return 0; 341 } 342 343 int 344 rte_latencystats_get_names(struct rte_metric_name *names, uint16_t size) 345 { 346 unsigned int i; 347 348 if (names == NULL || size < NUM_LATENCY_STATS) 349 return NUM_LATENCY_STATS; 350 351 for (i = 0; i < NUM_LATENCY_STATS; i++) 352 strlcpy(names[i].name, lat_stats_strings[i].name, 353 sizeof(names[i].name)); 354 355 return NUM_LATENCY_STATS; 356 } 357 358 int 359 rte_latencystats_get(struct rte_metric_value *values, uint16_t size) 360 { 361 if (size < NUM_LATENCY_STATS || values == NULL) 362 return NUM_LATENCY_STATS; 363 364 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 365 const struct rte_memzone *mz; 366 mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS); 367 if (mz == NULL) { 368 RTE_LOG(ERR, LATENCY_STATS, 369 "Latency stats memzone not found\n"); 370 return -ENOMEM; 371 } 372 glob_stats = mz->addr; 373 } 374 375 /* Retrieve latency stats */ 376 rte_latencystats_fill_values(values); 377 378 return NUM_LATENCY_STATS; 379 } 380